Beispiel #1
0
import logging
from flask import Flask

from twisted.internet import reactor
from flask.ext.script import Manager

from core.config import setup_config, config
from core.utils.init import home_dir, useradd
from core.logger import setup_logging

setup_config('%s/config.py' % home_dir())

from core.utils import change_user_vmmaster

setup_logging(
    log_type=getattr(config, "LOG_TYPE", None),
    log_level=getattr(config, "LOG_LEVEL", None)
)
app = Flask(__name__)
manager = Manager(app)
log = logging.getLogger(__name__)


@manager.command
def runserver():
    """
    Run server
    """
    from vmmaster.server import VMMasterServer
    VMMasterServer(reactor, config.PORT).run()

Beispiel #2
0
#===============================================================================
# Changing the current working directory, that is set the current working
# directory to the directory of this file.

FILE_PATH = os.path.realpath(__file__)
DIR_PATH, _ = os.path.split(FILE_PATH)
PATH = os.path.abspath(os.path.join(DIR_PATH, '..', '..'))
sys.path.append(PATH)
#===============================================================================
import constants
from core import logger
from core import cleanser
from core import data_updator
from db import db_session

logger.setup_logging()
LOGGER = logging.getLogger(__file__)


class AppManager():
    """AfterAppointmentLoaderManager loads after appointments for
    patients based on criteria provided through parameters to constructor
    """
    def __init__(self, **kwargs):
        """"""
        self.parse_data_option = None
        self.update_data_option = None
        self.parse_data_option = kwargs[constants.PARSE_DATA_KEY]
        self.update_data_option = kwargs[constants.UPDATE_DATA_KEY]

    def run(self):
    def create_local_tree(self, DG, tree, ref_node):
        """ Builds tree recursively. Uses graph data structure but enforces tree to simplify downstream."""
        nodes = DG.predecessors(ref_node)
        tree.add_node(ref_node, name=DG.nodes[ref_node])
        for item in nodes:
            if not tree.has_node(item):
                tree.add_edge(ref_node, item)
                tree.add_node(item, name=DG.nodes[item])
                self.create_local_tree(DG, tree, item)
        return tree

    def get_leafs(self, tree, leafs):
        """Recursive function to extract all leafs in order out of tree.
        Each pass, jobs are moved from "tree" to "leafs" variables until done.
        """
        cur_leafs = [
            node for node in tree.nodes()
            if tree.in_degree(node) != 0 and tree.out_degree(node) == 0
        ]
        leafs += cur_leafs

        for leaf in cur_leafs:
            tree.remove_node(leaf)

        if len(tree.nodes()) >= 2:
            self.get_leafs(tree, leafs)
        return leafs + list(tree.nodes())


logger = log.setup_logging('Job')
Beispiel #4
0
        if result.note:
            return {
                "note":
                api_candidate.get_note_on(candidate_id=result.candidate)
            }
        if result.company_count:
            return api_candidate.set_company_count(
                candidate_id=result.candidate,
                company_counts=result.company_count)
        if result.industries:
            return api_candidate.set_industries(candidate_id=result.candidate,
                                                industries=result.industries)
        if result.expertise:
            return api_candidate.set_functional_expertise(
                candidate_id=result.candidate, expertises=result.expertise)
        if result.sub:
            return api_candidate.set_sub_functional_expertise(
                candidate_id=result.candidate,
                functional_expertise_id=result.functional_id,
                expertises=result.sub)


if __name__ == '__main__':
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    parser = MyParser(add_help=True)
    parser.initialize()
    result = parser.validate()
    setup_logging(scrnlog=result.verbose)
    print(result)
    print(main(result=result))
Beispiel #5
0
        logger.info('Deleted aws secret, secret_id:' + eu.AWS_SECRET_ID)
        print('delete_secret response: {}'.format(response))


def terminate(error_message=None):
    """
    Method to exit the Python script. It will log the given message and then exit().
    :param error_message:
    """
    if error_message:
        logger.error(error_message)
    logger.critical('The script is now terminating')
    exit()


logger = log.setup_logging('Deploy')

if __name__ == "__main__":
    # Use as standalone to push random python script to cluster.
    # TODO: fails to create a new cluster but works to add a step to an existing cluster.
    print('command line: ', ' '.join(sys.argv))
    job_name = sys.argv[1] if len(
        sys.argv
    ) > 1 else 'examples/ex1_raw_job_cluster.py'  # TODO: move to 'jobs/examples/ex1_raw_job_cluster.py'

    class bag(object):
        pass

    yml = bag()
    yml.job_name = job_name
    yml.py_job = job_name  # will add /home/hadoop/app/  # TODO: try later as better from cmdline.
    for field, required_type in output_types.items():
        spark_type = get_spark_type(field, required_type)
        spark_types.append(spark_type)

    spark_schema = spk_types.StructType(spark_types)
    logger.info('spark_schema: {}'.format(spark_schema))
    return spark_schema


def pdf_to_sdf(
    df, output_types, sc, sc_sql
):  # TODO: check suspicion that this leads to each node requiring loading all libs from this script.
    spark_schema = get_spark_types(output_types)
    missing_columns = set(df.columns) - set(output_types.keys())
    if missing_columns:
        logger.warning(
            'Some fields from source pandas df will not be pushed to spark df (because of absence in output_types), check if need to be added: {}.'
            .format(missing_columns))

    recs = df.to_dict(orient='records')
    partitions = len(recs) / 1000
    partitions = partitions if partitions >= 1 else None
    # Push to spark. For easier testing of downstream casting (i.e. outside of spark): tmp = [cast_rec(row, output_types) for row in recs]
    rdd = sc.parallelize(recs, numSlices=partitions) \
            .map(lambda row: cast_rec(row, output_types))

    return sc_sql.createDataFrame(rdd, schema=spark_schema, verifySchema=True)


logger = log.setup_logging('DB_Utils')
Beispiel #7
0
# coding: utf-8
# author: [email protected]
# created: 2019-10-23 17:57:20
# modified: 2019-10-23 19:22:35
# =============================================================
r"""Test logging."""

from core.logger import setup_logging
from core.logger import get_logger
from core.utils import envs

if __name__ == "__main__":
    import os

    os.environ["LOG_CFG"] = "configs/logger_example.yaml"
    os.environ["LOG_NAME"] = "example"

    os.makedirs("./logs", exist_ok=True)

    log_name, log_cfg = envs("LOG_NAME", "LOG_CFG")

    setup_logging(log_cfg)
    logger = get_logger(log_name)

    logger.debug("debug")
    logger.info("info")
    logger.warning("warn")
    logger.error("error")
Beispiel #8
0
import argparse
import logging
import os
import sys

from core.logger import LOGGER_NAME, setup_logging
from vincere import VincereAPI
from vincere.candidate import CandidateAPI

logger = logging.getLogger(LOGGER_NAME)


if __name__ == '__main__':
    setup_logging(scrnlog=True)
    api_candidate = CandidateAPI()
    print (api_candidate.client.get(url="https://headhuntr.vincere.io/api/v2/candidate/1004286/industries"))
    print (api_candidate.client.get(url="https://headhuntr.vincere.io/api/v2/candidate/1004286/functionalexpertises"))
    print (api_candidate.client.get(url="https://headhuntr.vincere.io/api/v2/candidate/1004286"))
	
    print(api_candidate._get_candidate_details(candidate_id=1004286))
Beispiel #9
0
        'Sending table "{}" to oracle, mode "{}", size "{}", and chunksize "{}".'
        .format(name_tb, if_exist, len(df), chunksize))
    df.to_sql(
        name=name_tb,
        con=connection,
        if_exists=if_exist,
        dtype=types,
        index=False,
        chunksize=chunksize
    )  # dtype necessary to avoid infering leading to CLOB types (limit comparison with other strings and very slow).
    # TODO: check df.to_sql above for long integers. Noticed long numbers where rounded.
    logger.info(
        "Copied table to oracle '{}', using connection profile '{}'".format(
            name_tb, connection_profile))


logger = log.setup_logging('Oracle')

if __name__ == '__main__':
    from sqlalchemy import types
    import pandas as pd
    data = [['aaa', 10], ['bbb', 12], ['ccc', 3]]
    df = pd.DataFrame(data, columns=['session_id', 'count_events'])
    types = {
        'session_id': types.VARCHAR(16),
        'count_events': types.Integer(),
    }
    connection_profile = 'some_connection_profile'
    name_tb = 'test_table'
    create_table(df, connection_profile, name_tb, types)
Beispiel #10
0
import os
import logging
import logging.config
import logging.handlers
from core.logger import setup_logging

log = logging.getLogger(__name__)

DEBUG = False

# logging
LOG_TYPE = "plain"
LOG_LEVEL = "INFO"
LOGGING = setup_logging(log_type=LOG_TYPE, log_level=LOG_LEVEL)

SESSION_TIMEOUT = 60

# selenium
SELENIUM_PORT = 4455
VMMASTER_AGENT_PORT = 9000

STATIC_FOLDERS = 'worker/static'

RABBITMQ_USER = '******'
RABBITMQ_PASSWORD = '******'
RABBITMQ_HOST = 'mq1.prod.test'
RABBITMQ_PORT = 5672
RABBITMQ_COMMAND_QUEUE = "vmmaster_commands"
RABBITMQ_SESSION_QUEUE = "vmmaster_session"
RABBITMQ_HEARTBEAT = 10
RABBITMQ_REQUEST_TIMEOUT = 60
Beispiel #11
0
    logger.info(
        'Sending table "{}" to redshift in schema "{}", mode "{}", size "{}", and chunksize "{}".'
        .format(name_tb, schema, if_exist, len(df), chunksize))
    df.to_sql(name=name_tb,
              schema=schema,
              con=connection,
              if_exists=if_exist,
              dtype=types,
              index=False,
              chunksize=chunksize)
    # TODO: check df.to_sql above for long integers. Noticed long numbers where rounded.
    logger.info(
        "Copied table to redshift '{}.{}', using connection profile '{}'".
        format(schema, name_tb, connection_profile))


logger = log.setup_logging('Redshift')

if __name__ == '__main__':
    from sqlalchemy import types
    import pandas as pd
    data = [['aaa', 10], ['bbb', 12], ['ccc', 3]]
    df = pd.DataFrame(data, columns=['session_id', 'count_events'])
    types = {
        'session_id': types.VARCHAR(16),
        'count_events': types.Integer(),
    }
    connection_profile = 'some_connection_profile'
    name_tb = 'test_table'
    create_table(df, connection_profile, name_tb, types)