Ejemplo n.º 1
0
def create_app():
    app = Flask(__name__, template_folder='templates')
    CORS(app)
    CSRFProtect(app)
    config.init(os.environ['CONFIG_PATH'])
    app.config['SERVER_NAME'] = config.config['server_name']
    app.config['SECRET_KEY'] = config.config['secret_key']
    app.config['SQLALCHEMY_DATABASE_URI'] = config.config['db_url']
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
    app.config['SQLALCHEMY_ECHO'] = app.debug
    app.config.update(**config.config['mail'])
    app.config['MAIL_USERNAME'] = os.environ['APP_MAIL_USERNAME']
    app.config['MAIL_PASSWORD'] = os.environ['APP_MAIL_PASSWORD']
    login_manager.init_app(app)
    db.init_app(app)
    make_searchable(db.metadata)
    mail.init_app(app)
    migrate.init_app(app, db)
    from .auth import auth
    app.register_blueprint(auth, url_prefix='/auth')
    from .main import main
    app.register_blueprint(main, url_prefix='/')
    from .api import api
    app.register_blueprint(api, url_prefix='/api')

    return app
Ejemplo n.º 2
0
    def test_send_to_csv_data_topic(self):
        config.init()

        n = randint(1, 1001)
        key = str.format('key-test-{}', n)
        data = str.format('data-test-{}', n)

        engine = kafka_engine.KafkaEngine()
        engine.send_to_csv_data_topic(key.encode(), data.encode())
        engine.close()
        assert True
Ejemplo n.º 3
0
def create_app():
    app = Flask(__name__)

    config.init(app)
    cli.init(app)
    api.init(app)
    db.init(app)

    CORS(app)

    return app
Ejemplo n.º 4
0
def main():
    config.init()
    print("This is the entrypoint to the game importer")
    importer = GameImporter()
    if len(sys.argv) == 1:
        importer.import_from_picked_dir()
    elif len(sys.argv) == 2:
        if sys.argv[1] == "-f":
            importer.import_from_picked_file()
    elif len(sys.argv) == 3:
        if sys.argv[1] == "-f":
            importer.import_from_file(sys.argv[2])
Ejemplo n.º 5
0
def lambda_handler(event, context):
    config.init()

    record = event['Records'][0]
    bucket = record['s3']['bucket']['name']
    key = record['s3']['object']['key']
    print('bucket:{} key:{}'.format(bucket, key))

    input_file = path.join(bucket, key)
    with s3_client.open(input_file, 'r', newline='',
                        encoding='utf-8-sig') as file:
        process_file(file)
Ejemplo n.º 6
0
def main():
    config.init()
    print("This is the entrypoint to the application")
    app = App()
    app.run()
Ejemplo n.º 7
0
    def app_flow(self):
        # This method contains a state machine for the client and coordinator instance
        # Coordinator Workflow: 1 -> 2 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9
        # Client Workflow:      1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 8 -> 9

        # === States ===
        state_initialize = 1
        state_read_config = 2
        state_wait_for_config = 3
        state_read_input = 4
        state_local_computation = 5
        state_wait_for_global_aggregation = 6
        state_global_aggregation = 7
        state_write_results = 8
        state_finish = 9

        # Initial state
        state = state_initialize
        self.progress = 'initializing'

        while True:

            # INITIALIZE THE WORKFLOW

            if state == state_initialize:
                if self.id is not None:  # Test if setup has happened already
                    config.init()  # initialize config dictionary
                    config.add_option('id', self.id)
                    config.add_option('is_coordinator', self.master)

                    # If config does not exist, wait for correct input in the frontend
                    if check_if_config_file_exists():
                        self.progress = 'parsing config file'
                        read_config(self.master)
                        self.create_splits()
                    else:
                        print(
                            '[IO] No config file found. Waiting for user input in the FrontEnd...'
                        )
                        config_is_valid = False
                        while config_is_valid is False:
                            config.add_option('input_form', False)
                            self.progress = 'getting config frontend'
                            while config.get_option('input_form') is False:
                                time.sleep(10)

                            self.progress = 'parsing config frontend'
                            print(
                                '[IO] Received FrontEnd Input Form. Continuing GrandForest workflow...'
                            )
                            read_config_from_frontend(
                                self.master, config.get_option('input_form'))
                            self.create_splits()
                            if check_config(self.split_expression_data):
                                config_is_valid = True

                    self.local_models = dict.fromkeys(
                        self.split_expression_data.keys())

                    # create temp directory for python <-> R data exchange
                    # TODO create RAMDISK instead?
                    try:
                        os.makedirs(config.get_option('TEMP_DIR'))
                    except OSError as e:
                        if e.errno != errno.EEXIST:
                            print(
                                f'[CRIT] Could not create temporary directory',
                                flush=True)
                            raise

                # Set Expression Data Sample Size for Model Balancing
                if self.master:
                    self.data_incoming.append([
                        self.id,
                        get_input_filesizes(self.split_expression_data)
                    ])
                    state = state_read_config
                else:
                    self.data_outgoing = json.dumps([
                        self.id,
                        get_input_filesizes(self.split_expression_data)
                    ])
                    self.status_available = True
                    state = state_wait_for_config

            # READ CONFIG AND SEND GLOBAL OPTIONS TO CLIENTS

            if state == state_read_config:
                self.progress = 'sending config'

                # Prepare and Send global options from the configuration to all clients
                #  including balanced amount of trees to be trained
                if self.master:
                    print("[MASTER] Received Data from ",
                          len(self.data_incoming), " of ", str(self.clients),
                          "clients.")
                    if len(self.data_incoming) == len(self.clients):
                        print(
                            f'[CLIENT] Received all client expression data filesizes.',
                            flush=True)
                        filesizes_combined = dict()
                        for participant in self.data_incoming:
                            for split in self.split_expression_data.keys():
                                try:
                                    filesizes_combined[split]
                                except KeyError:
                                    filesizes_combined[split] = 0
                                filesizes_combined[split] = filesizes_combined[
                                    split] + participant[1][split]

                        num_trees_per_client_per_split = dict()
                        for participant in self.data_incoming:
                            for split in self.split_expression_data.keys():
                                try:
                                    num_trees_per_client_per_split[
                                        participant[0]]
                                except KeyError:
                                    num_trees_per_client_per_split[
                                        participant[0]] = dict()

                                try:
                                    num_trees_per_client_per_split[
                                        participant[0]][split]
                                except KeyError:
                                    num_trees_per_client_per_split[
                                        participant[0]][split] = 0
                                num_trees_per_client_per_split[
                                    participant[0]][split] = math.ceil(
                                        participant[1][split] /
                                        filesizes_combined[split] * int(
                                            config.get_option(
                                                'number_of_trees')))

                        self.interaction_network = read_input(
                            config.get_option('interaction_network_filepath'),
                            config.get_option('interaction_network_filename'),
                            config.get_option('interaction_network_separator'))
                        config.add_option(
                            'number_of_trees_per_split',
                            num_trees_per_client_per_split[self.id])
                        self.data_incoming = []

                        print(
                            f'[COORDINATOR] Sending interaction network to clients',
                            flush=True)
                        self.data_outgoing = json.dumps([
                            config.get_option('grandforest_method'),
                            config.get_option('grandforest_treetype'),
                            num_trees_per_client_per_split,
                            config.get_option('minimal_node_size'),
                            config.get_option('seed'), self.interaction_network
                        ])
                        self.status_available = True
                        state = state_read_input
                else:
                    state = state_wait_for_config

            # WAIT FOR CONFIG

            if state == state_wait_for_config:
                self.progress = 'gathering config'
                if len(self.data_incoming) > 0:
                    config.add_option('grandforest_method',
                                      self.data_incoming[0][0])
                    config.add_option('grandforest_treetype',
                                      self.data_incoming[0][1])
                    config.add_option('number_of_trees_per_split',
                                      self.data_incoming[0][2][self.id])
                    config.add_option('minimal_node_size',
                                      self.data_incoming[0][3])
                    config.add_option('seed', self.data_incoming[0][4])
                    self.interaction_network = self.data_incoming[0][5]
                    print(
                        f'[CLIENT] Received config and interaction network with size {sys.getsizeof(self.interaction_network)} Bytes from coordinator',
                        flush=True)
                    self.data_incoming = []
                    state = state_read_input

            # READ INPUT FILES IN R

            if state == state_read_input:
                for split in self.split_expression_data.keys():
                    self.split_expression_data[split] = read_input(
                        split + '/' +
                        config.get_option('expression_data_filename'),
                        config.get_option('expression_data_filename'),
                        config.get_option('expression_data_separator'))
                state = state_local_computation

            # COMPUTE LOCAL MODEL IN R

            if state == state_local_computation:
                self.progress = 'computing'

                # Check if config is valid
                #  this could be outsourced to io.py, since the frontend configuration is already checked there
                if config.get_option('grandforest_method') == "supervised":
                    if config.get_option('grandforest_treetype') == "survival":
                        try:
                            config.get_option('expression_data_survival_event')
                            config.get_option('expression_data_survival_time')
                        except KeyError:
                            print('[LOGIC] Config File Error.')
                            raise ValueError(
                                "The GrandForest Layout is invalid: survival time and/or event missing"
                            )
                        config.add_option(
                            'expression_data_dependent_variable_name', "None")
                    else:
                        try:
                            config.get_option(
                                'expression_data_dependent_variable_name')
                        except KeyError:
                            print('[LOGIC] Config File Error.')
                            raise ValueError(
                                "The GrandForest Layout is invalid: dependent variable name missing"
                            )
                        config.add_option('expression_data_survival_event',
                                          "None")
                        config.add_option('expression_data_survival_time',
                                          "None")

                for split in self.split_expression_data.keys():
                    self.local_models[split] = local_computation(
                        self.split_expression_data[split],
                        self.interaction_network, split)

                if self.master:
                    print(f'[COORDINATOR] Finished computing the local model',
                          flush=True)
                    self.client_models.append(self.local_models)
                else:
                    print(f'[CLIENT] Sending local model to master',
                          flush=True)
                    self.data_outgoing = json.dumps(self.local_models)
                    self.data_incoming = []
                    self.status_available = True

                state = state_wait_for_global_aggregation

            # WAIT FOR GLOBAL AGGREGATION

            if state == state_wait_for_global_aggregation:
                if self.master:
                    self.progress = 'gathering models'
                    if len(self.data_incoming) == len(self.clients) - 1:
                        print(
                            f'[COORDINATOR] Received local models from all clients',
                            flush=True)
                        self.client_models.extend(self.data_incoming)
                        self.data_incoming = []
                        state = state_global_aggregation
                else:
                    self.progress = 'gathering global model'
                    if len(self.data_incoming) > 0:
                        self.global_models = self.data_incoming[0]
                        self.data_incoming = []
                        print(f'[CLIENT] Received result from master',
                              flush=True)
                        state = state_write_results

            # GLOBAL AGGREGATION IN R

            if state == state_global_aggregation:
                self.progress = 'computing'
                for split in self.split_expression_data.keys():
                    self.global_models[split] = global_aggregation([
                        client_splits[split]
                        for client_splits in self.client_models
                    ])
                print(f'[COORDINATOR] Sending global model to clients',
                      flush=True)
                self.data_outgoing = json.dumps(self.global_models)
                self.status_available = True
                state = state_write_results

            # WRITE AND ANALYZE RESULTS IN R

            if state == state_write_results:
                self.progress = 'writing results'
                for split in self.split_expression_data.keys():
                    if config.get_option('prediction'):
                        local_prediction(self.global_models[split],
                                         self.split_expression_data[split],
                                         split.replace("/input/", "/output/"))
                    write_results(self.local_models[split],
                                  self.global_models[split],
                                  split.replace("/input/", "/output/"))
                    result_analysis(self.local_models[split],
                                    self.global_models[split],
                                    self.interaction_network,
                                    self.split_expression_data[split],
                                    split.replace("/input/", "/output/"))

                if self.master:
                    self.data_incoming = ['DONE']
                else:
                    self.data_outgoing = json.dumps('DONE')
                    self.status_available = True
                state = state_finish

            # FINISH THE WORKFLOW

            if state == state_finish:
                self.progress = 'finishing'
                if self.master:
                    if len(self.data_incoming) == len(self.clients):
                        # FINISH COORDINATOR
                        print(
                            f'[COORDINATOR] Finished the workflow, exiting...',
                            flush=True)
                        self.status_finished = True
                        break
                else:
                    # FINISH CIENT
                    print(f'[CLIENT] Finished the workflow, exiting...',
                          flush=True)
                    self.status_finished = True
                    break

            time.sleep(0.1)