Beispiel #1
0
def return_download_path():
    filename = [file for file in os.listdir(download_dir) if Path(file).suffix not in unwanted_filetypes and 
                Path(file).stem == session['filename_stem']][0]

    filesize = round((os.path.getsize(os.path.join(download_dir, filename)) / 1_000_000), 2)
    update_database(filesize)

    # Remove any hashtags or pecentage symbols as they cause an issue and make the filename more aesthetically pleasing.
    new_filename = filename.replace('#', '').replace('%', '').replace('_', ' ')

    try:
        # Rename the file.
        os.replace(os.path.join(download_dir, filename), os.path.join(download_dir, new_filename))
    except Exception as e:
        log.info(f'Unable to rename {filename} to {new_filename}:\n{e}')
        clean_up(Path(filename).stem)
    else:
        log.info(f'{new_filename} | {filesize} MB')
        clean_up(Path(new_filename).stem)

        # Update the list of videos downloaded.
        with open("logs/downloads.txt", "a") as f:
            f.write(f'\n{new_filename}')

        # Return the download link.
        return os.path.join('api', 'downloads', new_filename)
def main():

    # Experiment setup
    for name in ['JIGSAWS_K', 'JIGSAWS_N', 'JIGSAWS']:
        update_config_file(['dataset_name'], name)
        utils.set_up_dirs()
        utils.clean_up()
        experiment_tcn()
        experiment_trpo('full')
Beispiel #3
0
    def run_reducer(self, **kwargs):
        """ Inside the run() function, apply the reducer to all of the mapped-aggregated result values. """
        if self.qsub is False:
            reducer = kwargs['reducer']
            mapped_results = kwargs['mapped_results']
            return reducer(mapped_results[0], parameters=self.parameters[0])
        else:
            import shutil
            import subprocess
            random_string = str(uuid.uuid4())
            temp_job_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmpRed_" + random_string)

            # Create temp_job_directory.
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            unpickled_inp = dict(mapped_results=kwargs['mapped_results'], parameters=self.parameters)

            input_file_path = os.path.join(temp_job_directory, constants.reduce_input_file_name)
            # Write input file
            with open(input_file_path, "wb") as input_file:
                cloudpickle.dump(unpickled_inp, input_file)

            # Copy input files to working directory.
            shutil.copyfile(kwargs['pickled_cluster_input_file'], os.path.join(temp_job_directory,
                                                                               constants.pickled_cluster_input_file))
            shutil.copyfile(input_file_path, os.path.join(temp_job_directory, constants.job_input_file_name))

            # Copy library scripts to working directory.
            shutil.copyfile(constants.parameter_sweep_run_reducer_shell_script,
                            os.path.join(temp_job_directory, os.path.basename(
                                constants.parameter_sweep_run_reducer_shell_script)))
            shutil.copyfile(constants.parameter_sweep_run_reducer_pyfile,
                            os.path.join(temp_job_directory, os.path.basename(
                                constants.parameter_sweep_run_reducer_pyfile)))

            reduce_script_file = os.path.join(temp_job_directory, os.path.basename(
                constants.parameter_sweep_run_reducer_shell_script))
            container_name = os.path.basename(temp_job_directory)

            # Invoke parameter_sweep_run_reducer.
            subprocess.call("bash {0} {1} {2}".format(reduce_script_file, container_name, temp_job_directory),
                            shell=True)

            self._wait_for_all_results_to_return([temp_job_directory])

            with open(os.path.join(temp_job_directory, constants.job_output_file_name), "r") as of:
                result = of.read()

            # Remove job directory and container.
            clean_up([temp_job_directory], [container_name])

            return result
def place_trials_default(expnum, start_time, end_time, verbose=False):
    """This is going to be the primary way of moving processed data from it's proper location
    to the PEN tool's subfolder. As long as the data is organized with our standard format where
    the metadata is located on the mysql database, this will handle all the uploading.
    WARNING: Currently this will not realize if you've pointed it to a folder that it already uploaded."""
    destination = experiment_path[expnum]
    current_trial = utils.find_last_trial(expnum) + 1
    existing_evid_dict = caching.load_evid_dictionary(expnum)
    event_data_dicts = smysql.retrieve_event_description(start_time, end_time, list_of_sites=mySQL_sitedef[expnum])
    default_folder = smysql.retrieve_data_folder()

    # Look at every event in the database between time constraints.
    for event in event_data_dicts:
        site_evt_number = event[cfg_evt_siteEvt]
        site_evt_time = event[cfg_evt_time]
        site_event_id = event[cfg_evt_evid]
        site_event_dist = event[cfg_evt_dist]
        site_event_ml = event[cfg_evt_ml]
        file_data_dicts = smysql.retrieve_file_location(site_evt_number, mySQL_stadef[expnum])

        # If this event has already been uploaded, report it and skip this event.
        if site_event_id in existing_evid_dict.values():
            nees_logging.log_existing_evid(site_event_id)
            continue

        # Don't do anything if there's no data
        if file_data_dicts == []:
            continue

        # Generate file structure on shttp and local system.
        description = utils.generate_description(event)
        trialtitle = datetime.datetime.utcfromtimestamp(site_evt_time).strftime(default_time_format)
        trial_doc_folder = "%sTrial-%s/Documentation/" % (destination, current_trial)
        report_source = "%sTrial-%s/Rep-1/%s/" % (destination, current_trial, cfg_hub_ext_fold[".txt"])
        report_name = "report.csv"
        readme_name = "readme.pdf"
        events_kml = "event.kml"
        utils.generate_trial_structure(destination, current_trial)
        shttp.post_full_trial(shttp.experiment_id_dic[expnum], trialtitle, description, current_trial)

        # Find and move every file within an event to the created file structure.
        move_datafiles(file_data_dicts, event, destination, current_trial, trial_doc_folder, default_folder, expnum)
        utils.move_files(report_source, trial_doc_folder, [report_name, readme_name, events_kml])
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, report_name)
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, readme_name)
        snupload.upload_reportfile(expnum, current_trial, trial_doc_folder, events_kml)
        utils.clean_up(report_source)

        # Move on to next trial for further processing after updating cache..
        nees_logging.log_goto_nextline(neeshub_log_filename)
        caching.update_all_cache_dictionaries(expnum, current_trial, site_event_id, site_event_ml, site_event_dist)
        current_trial += 1
 def get_fetched_data_cell_lines_mapping(fetched_cell_lines,
                                         input_cell_lines):
     mapping = dict()
     max_similarity = 0
     for input_cell_line in input_cell_lines:
         cleaned_input_cell_line = clean_up(input_cell_line)
         for fetched_cell_line in fetched_cell_lines:
             current_similarity = similar(clean_up(fetched_cell_line),
                                          cleaned_input_cell_line)
             if current_similarity > max_similarity:
                 mapping[input_cell_line] = fetched_cell_line
                 max_similarity = current_similarity
         max_similarity = 0
     return mapping
Beispiel #6
0
def read_files():
    #to preprocess once, uncomment get_samples_clades
    #get_samples_clades()
    forward_dict = utils.read_json(PATH_F_DICT)
    rev_dict = utils.read_json(PATH_R_DICT)
    encoder = None
    decoder = None
    if pretrained_model is False:
        print("Cleaning up stale folders...")
        utils.clean_up(stale_folders)
        clades_in_clades_out = utils.read_json(PATH_TRAINING_CLADES)
        print(clades_in_clades_out)
        print("Preprocessing sample-clade assignment file...")
        dataf = pd.read_csv(PATH_SAMPLES_CLADES, sep=",")
        filtered_dataf = preprocess_sequences.filter_samples_clades(dataf)

        unrelated_clades = utils.read_json(PATH_UNRELATED_CLADES)
        print("Generating cross product of real parent child...")
        preprocess_sequences.make_cross_product(clades_in_clades_out,
                                                filtered_dataf,
                                                len_aa_subseq,
                                                start_token,
                                                parent_collection_start_month,
                                                train_size=test_train_size,
                                                edit_threshold=max_l_dist,
                                                random_size=random_clade_size,
                                                unrelated=False)
        #print("Generating cross product of real sequences but not parent-child...")
        #preprocess_sequences.make_cross_product(unrelated_clades, filtered_dataf, len_aa_subseq, start_token, train_size=1.0, edit_threshold=max_l_dist, random_size=random_clade_size, unrelated=True)
        #sys.exit()
    else:
        if retrain_pretrain_start_index == 0:
            encoder = tf.keras.models.load_model(PRETRAIN_GEN_ENC_MODEL)
            decoder = tf.keras.models.load_model(PRETRAIN_GEN_DEC_MODEL)
        else:
            print("retrain_pretrain_start_index", retrain_pretrain_start_index)
            enc_path = "data/generated_files/pre_train/" + str(
                retrain_pretrain_start_index) + "/enc"
            dec_path = "data/generated_files/pre_train/" + str(
                retrain_pretrain_start_index) + "/dec"
            encoder = tf.keras.models.load_model(enc_path)
            decoder = tf.keras.models.load_model(dec_path)

    start_training(forward_dict, rev_dict, encoder, decoder)
Beispiel #7
0
def set_globals(arg):
    global args
    global dist_text
    global dist_label
    global output_path
    global combine_type
    global class_no
    global fine_tuned
    global feature_path
    global file_features_path

    args = arg
    dist_text = os.path.join(args.processed_data_path, 'text')
    dist_label = os.path.join(args.processed_data_path, 'lab/')
    output_path = os.path.join(args.processed_data_path,
                               'experiments/outputs/')

    class_no = 3
    combine_type = args.combine_type

    if args.fine_tuning:
        fine_tuned = 'tuned'
    else:
        fine_tuned = ''

    if args.model_name == 'wietsedv/bert-base-dutch-cased':
        feature_path = os.path.join('../features/DiFE/' + combine_type + '_' +
                                    fine_tuned)
    elif args.model_name == 'wietsedv/bert-base-dutch-cased-finetuned-sentiment':
        feature_path = os.path.join('../features/DiFE/' + combine_type +
                                    '_sent' + fine_tuned)
    else:
        print(args.model_name)
        exit()
    if feature_path[-1] == '_':
        feature_path = feature_path[:-1]

    file_features_path = os.path.join(feature_path, 'file_level')
    if os.path.exists(feature_path):
        clean_up(feature_path)
    if os.path.exists(file_features_path):
        clean_up(file_features_path)
Beispiel #8
0
    def _qsub_generate_and_store_realizations(self, pparams, param_set_ids, seed_list, pchunks, divid=None,
                                              progress_bar=False):
        counter = 0
        random_string = str(uuid.uuid4())

        base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "realizations_" + random_string)

        job_name_prefix = "ps_job_" + random_string[:8] + "_"
        dirs = []
        containers = []
        job_param_ids = {}

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        if self.storage_mode is not constants.local_storage:
            self.log.write_log("Storage mode must be local while using qsub.", logging.ERROR)
            raise MolnsUtilException("Storage mode must be local while using qsub.")

        for pndx, pset, seed, pchunk in zip(param_set_ids, pparams, seed_list, pchunks):
            if self.cluster_execution is True:
                unpickled_list = dict(pchunk=pchunk, seed=seed,
                                      pickled_cluster_input_file=self.pickled_cluster_input_file,
                                      pset=pset, pndx=pndx, storage_mode=constants.local_storage)
            else:
                unpickled_list = dict(pchunk=pchunk, seed=seed,
                                      model_class=self.model_class, pset=pset, pndx=pndx,
                                      storage_mode=constants.local_storage)

            job_name = job_name_prefix + str(counter)

            # create temp directory for this job.
            temp_job_directory = os.path.join(base_dir, job_name + "/")
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            self._submit_qsub_job(constants.run_ensemble_job_file, job_name, unpickled_list, containers, dirs,
                                  temp_job_directory)

            job_param_ids[pndx] = temp_job_directory
            counter += 1

        keep_dirs = self._wait_for_all_results_to_return(wait_for_dirs=dirs, divid=divid, progress_bar=progress_bar)

        remove_dirs = [directory for directory in dirs if directory not in keep_dirs]
        for i, directory in enumerate(remove_dirs):
            unpickled_result = get_unpickled_result(directory)
            r = unpickled_result['filenames']
            param_set_id = unpickled_result['param_set_id']
            if param_set_id not in self.result_list:
                self.result_list[param_set_id] = []
            self.result_list[param_set_id].extend(r)

        self.log.write_log("Cleaning up. Job directory: {0}".format(base_dir))

        # Arrange for generated files to be available in a known location - base_dir.
        DistributedEnsemble.__post_process_generated_ensemble(remove_dirs, base_dir)

        # Delete job containers and directories. Preserve base_dir as it contains computed realizations.
        clean_up(dirs_to_delete=remove_dirs, containers_to_delete=containers)

        return jsonify(realizations_directory=base_dir, result_list=self.result_list)
Beispiel #9
0
    def _qsub_run_ensemble_map_aggregate(self, **kwargs):
        counter = 0
        random_string = str(uuid.uuid4())
        base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp_" + random_string)
        job_name_prefix = "ps_job_" + random_string[:8] + "_"
        dirs = []
        containers = []
        number_of_trajectories = kwargs['number_of_trajectories']
        chunk_size = kwargs['chunk_size']

        self.log.write_log("Generating {0} realizations of the model, running mapper & aggregator (chunk size={1})"
                           .format(number_of_trajectories, chunk_size))

        if self.cluster_execution is False:
            if kwargs['aggregator'] is None:
                aggregator = builtin_aggregator_list_append
            else:
                aggregator = kwargs['aggregator']

        if not os.path.exists(base_dir):
            os.makedirs(base_dir)

        num_chunks = int(math.ceil(number_of_trajectories / float(chunk_size)))
        seed_list = self._get_seed_list(len(self.parameters), number_of_trajectories, chunk_size)
        pparams = []
        param_set_ids = []
        self._set_pparams_paramsetids_presultlist(num_chunks=num_chunks, pparams=pparams, param_set_ids=param_set_ids)

        for pndx, pset, seed in zip(param_set_ids, pparams, seed_list):
            if self.cluster_execution is False:
                unpickled_list = dict(chunk_size=chunk_size, seed=seed, model_cls=self.model_class,
                                      mapper=kwargs['mapper'], aggregator=aggregator, pset=pset, pndx=pndx)
            else:
                unpickled_list = dict(chunk_size=chunk_size, seed=seed,
                                      pickled_cluster_input_file=kwargs['pickled_cluster_input_file'], pset=pset,
                                      pndx=pndx)

            job_name = job_name_prefix + str(counter)

            # create temp directory for this job.
            temp_job_directory = os.path.join(base_dir, job_name + "/")
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            self._submit_qsub_job(constants.run_ensemble_map_and_aggregate_job_file, job_name, unpickled_list,
                                  containers,
                                  dirs, temp_job_directory)

            counter += 1

        keep_dirs = self._wait_for_all_results_to_return(wait_for_dirs=dirs,
                                                         progress_bar=kwargs.get('progress_bar', False),
                                                         divid=kwargs.get('divid', None))

        # Process only the results successfully computed into a format compatible with self.run_reducer.
        remove_dirs = [directory for directory in dirs if directory not in keep_dirs]
        mapped_results = {}
        self._set_qsub_mapped_results(remove_dirs, mapped_results)

        self.log.write_log("Cleaning up. Job directory: {0}".format(base_dir))

        # remove temporary files and finished containers. Keep all files that record errors.
        dirs_to_delete = remove_dirs
        if len(keep_dirs) == 0:
            dirs_to_delete = [base_dir]

        clean_up(dirs_to_delete=dirs_to_delete, containers_to_delete=containers)

        return mapped_results
Beispiel #10
0
    def qsub_map_aggregate_stored_realizations(self, **kwargs):
        realizations_storage_directory = kwargs['realizations_storage_directory']
        self.result_list = kwargs.get("result_list", self.result_list)
        number_of_trajectories = self.number_of_trajectories if self.number_of_trajectories is not 0 \
            else len(self.result_list)
        chunk_size = kwargs.get('chunk_size', self._determine_chunk_size(number_of_trajectories))

        if self.parameters is None:
            raise MolnsUtilException("self.parameters is None. I don't know how to proceed.")

        self.log.write_log("Running mapper & aggregator on the result objects (number of results={0}, chunk size={1})"
                           .format(number_of_trajectories * len(self.parameters), chunk_size))

        counter = 0
        random_string = str(uuid.uuid4())
        if not os.path.isdir(realizations_storage_directory):
            self.log.write_log("Directory {0} does not exist.".format(realizations_storage_directory), logging.ERROR)
            raise MolnsUtilException("Directory {0} does not exist.".format(realizations_storage_directory))

        base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp_" + random_string)
        job_name_prefix = "ps_job_" + random_string[:8] + "_"
        dirs = []
        containers = []

        # chunks per parameter TODO is number_of_trajectories correct here?
        self.log.write_log("Number of trajectories: {0}".format(number_of_trajectories))
        num_chunks = int(math.ceil(number_of_trajectories / float(chunk_size)))
        chunks = [chunk_size] * (num_chunks - 1)
        chunks.append(number_of_trajectories - chunk_size * (num_chunks - 1))
        # total chunks
        pparams = []
        param_set_ids = []
        presult_list = []
        self._set_pparams_paramsetids_presultlist(num_chunks, pparams, param_set_ids, presult_list, chunk_size)

        for result, pndx in zip(presult_list, param_set_ids):
            # create temp directory for this job.
            job_name = job_name_prefix + str(counter)
            temp_job_directory = os.path.join(base_dir, job_name + "/")
            if not os.path.exists(temp_job_directory):
                os.makedirs(temp_job_directory)

            # copy pre-computed realizations to working directory.
            import shutil
            for i, filename in enumerate(result):
                shutil.copyfile(os.path.join(realizations_storage_directory, filename),
                                os.path.join(temp_job_directory, filename))

            if self.cluster_execution is False:
                unpickled_list = dict(result=result, pndx=pndx, mapper=kwargs['mapper'],
                                      aggregator=kwargs['aggregator'], cache_results=False)
            else:
                unpickled_list = dict(result=result, pndx=pndx, cache_results=False,
                                      pickled_cluster_input_file=kwargs['pickled_cluster_input_file'])

            self._submit_qsub_job(constants.map_and_aggregate_job_file, job_name, unpickled_list, containers, dirs,
                                  temp_job_directory)

            counter += 1

        keep_dirs = self._wait_for_all_results_to_return(wait_for_dirs=dirs, divid=kwargs.get('divid', False))

        remove_dirs = [directory for directory in dirs if directory not in keep_dirs]
        mapped_results = {}
        self._set_qsub_mapped_results(remove_dirs, mapped_results)

        self.log.write_log("Cleaning up job directory {0}".format(base_dir))

        # remove temporary files and finished containers. Keep all files that record errors.
        dirs_to_delete = remove_dirs
        if len(keep_dirs) == 0:
            dirs_to_delete = [base_dir]

        clean_up(dirs_to_delete=dirs_to_delete, containers_to_delete=containers)

        return mapped_results
Beispiel #11
0
            # high enough
            if motionCounter >= int(conf["min_motion_frames"]):

                with open('conf.json') as json_file:
                    conf2 = json.load(json_file)
                    #ftp count
                    ftpCount = 0
                # send an email if enabled
                if (conf2["device_mode_email"] == 1):

                    print("[INFO] Sending an alert email!!!")
                    send_email(conf)
                    if ((conf2["device_mode_pic"]) == 1):
                        ftp_up()
                        ftpCount = 1
                        clean_up()
                    else:
                        clean_up()

                if ((conf2['device_mode_pic'] == 1) & (ftpCount == 0)):
                    print("[ftping]....")
                    ftp_up()
                    clean_up()

                    print("[INFO] waiting {} seconds...".format(
                        conf["camera_warmup_time"]))
                    time.sleep(conf["camera_warmup_time"])
                    print("[INFO] running")

                    print(conf2['device_mode_pic'])
                    print(ftpCount)
Beispiel #12
0
def html_to_json(url):
    category, uid = tokenize(url)
    schema_name = 'schema/{}.json'.format(category)
    with open(schema_name, 'rb') as fp:
        template = json.load(fp)
    html_doc = get_html(url)
    soup = BeautifulSoup(html_doc, 'html.parser')

    table_title = None
    result = {}
    ignore_image = True
    for tr in soup.find_all('tr'):
        # keep only the most bottom level tr
        if tr.find_all('tr'):
            continue
        is_title_row = False
        row_content = []
        for td in tr.find_all('td'):
            if ignore_image and td.find_all('img'):
                continue
            text = clean_up(td.text)
            if text in template:
                table_title = text
                is_title_row = True
                row_titles = template[table_title]
                ignore_image = row_titles['ignore image']
                result[table_title] = {}
                break
            link = ''
            for a in td.find_all('a'):
                link = a.get('href')
            row_content.append({'text': text, 'link': link})

        if is_title_row:
            continue

        if not row_content or not table_title:
            continue

        column_index = row_titles['column index']
        strict_match = row_titles['strict match']
        regex_match = row_titles['regex match']
        terminate_on_mismatch = row_titles['terminate on mismatch']

        matched = False
        if len(row_content) > column_index + 1:
            candidate_row_title = row_content[column_index]['text']
            for s in strict_match:
                if s == candidate_row_title and s not in result[table_title]:
                    matched = True
                    result[table_title][s] = row_content[column_index + 1:]
                    break
            if not matched:
                for s in regex_match:
                    if s in candidate_row_title:
                        matched = True
                        result[table_title][u'Certified Votes'] = row_content[column_index + 1:]
                        break
                    if re.match(s, candidate_row_title):
                        matched = True
                        category, race_id = tokenize(row_content[column_index + 1]['link'])
                        result[table_title][race_id] = row_content[column_index:]
                        break
        if terminate_on_mismatch and not matched:
            table_title = None
            ignore_image = True
    return result
Beispiel #13
0
def main(argv):
    arg_parser = ArgParser()
    args = arg_parser.parse(argv)
    game_hash = args.game_hash

    # Set up logging
    formatter = logging.Formatter('%(asctime)s %(message)s')
    logging.basicConfig(filename=os.path.join(args.log_path,
                        'game_{}_vms.log'.format(game_hash)),
                        level=logging.DEBUG, format='%(asctime)s %(message)s',
                        datefmt="%H:%M:%S", filemode='w')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    try:
        game = args.game
        logging.debug("Game description JSON: {}".format(game))
        status(game_hash, "Started creating VMs", args.remote)

        assert re.match(r'[a-zA-Z0-9]+\Z', game_hash)
        status(game_hash, "PENDING", args.remote)

        game_name = game['name']
        assert re.match(r'[a-zA-Z0-9 _-]+\Z', game_name)
        teams = game['teams']
        services = [s['service_name'] for s in game['services']]
        sudo = game.get('sudo', False)

        logging.info("Game name: {}".format(game_name))
        logging.info("Teams: {}".format(teams))
        logging.info("Services: {}".format(services))
        assert game['num_services'] == len(game['services'])
        assert game['num_services'] == len(services)
        # Avoid an IP conflict with the organization VM (10.7.254.10)
        assert len(teams) < 200

        #Cleaning up previous creations
        clean_up(args.output_path, game_hash, teams, bundle=True)
        game_dir = gamepath(args.output_path, game_hash)
        root_key_path = os.path.join(game_dir, "root_key")
        root_public_key = create_ssh_key(root_key_path)

        create_org(args.output_path, game_hash, game_name, teams, services,
                   root_key_path, args.remote)

        for team_id, team in enumerate(teams, start=1):
            team_public_key = create_ssh_key("{}/team{}_key".format(game_dir,
                                                                    team_id))
            create_team(args.output_path, game_hash, team_id, root_public_key,
                        team_public_key, team['password'], sudo, services,
                        args.remote)
        bundle(game_hash, "Organization", "root_key", "organization",
               args.output_path, args.remote)
        for team_id, team in enumerate(teams, start=1):
            team_name = team['name']
            bundle(game_hash, "Team{}".format(team_id),
                   "team{}_key".format(team_id), team_name, args.output_path,
                   args.remote)

        status(game_hash, "Cleaning up the build")
        clean_up(args.output_path, game_hash, teams)

        status(game_hash, "READY")

    except:
        status(game_hash, "An error occurred. Contact us and report game "
                          "{}".format(game_hash))
        status(game_hash, "ERROR")
        logging.exception("Exception")
        os.system("echo 'Creation for {} failed, see the log in /tmp' | "
                  "mail -s 'Error creating game {}' "
                  "root".format(game_hash, game_hash))
Beispiel #14
0
                        f.write(part + " " + key + " " +
                                str(self.classification[part][key]) + "\n")


def convert(file, out):
    """
    I used this to convert the truth file for emails I found on internet, where the SPAM or HAM was before file name
    :param file:
    :param out:
    """
    dic = {}
    with open(file, 'r', encoding="utf-8") as f:
        for line in f.readlines():
            key, val = line.split()
            dic[val] = key
    with open(out, 'w', encoding="utf-8") as f:
        for key in dic:
            f.write(key + " " + dic[key] + "\n")


if __name__ == "__main__":
    # used for testing and debugging
    filter = MyFilter()
    filter.test("SPAM-data/2/")
    confusion_matrix = BinaryConfusionMatrix('SPAM', 'OK')
    confusion_matrix.compute_from_dicts(
        utils.read_classification_from_file("SPAM-data/2/"),
        filter.predictions)
    print("Quality: %.2f%%" % (confusion_matrix.quality_score() * 100))
    utils.clean_up("SPAM-data/2/")  # clean !truth
Beispiel #15
0
def main(argv):
    arg_parser = ArgParser()
    args = arg_parser.parse(argv)
    game_hash = args.game_hash

    # Set up logging
    formatter = logging.Formatter('%(asctime)s %(message)s')
    logging.basicConfig(filename=os.path.join(
        args.log_path, 'game_{}_vms.log'.format(game_hash)),
                        level=logging.DEBUG,
                        format='%(asctime)s %(message)s',
                        datefmt="%H:%M:%S",
                        filemode='w')
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    try:
        game = args.game
        logging.debug("Game description JSON: {}".format(game))
        status(game_hash, "Started creating VMs", args.remote)

        assert re.match(r'[a-zA-Z0-9]+\Z', game_hash)
        status(game_hash, "PENDING", args.remote)

        game_name = game['name']
        assert re.match(r'[a-zA-Z0-9 _-]+\Z', game_name)
        teams = game['teams']
        services = [s['service_name'] for s in game['services']]
        sudo = game.get('sudo', False)

        logging.info("Game name: {}".format(game_name))
        logging.info("Teams: {}".format(teams))
        logging.info("Services: {}".format(services))
        assert game['num_services'] == len(game['services'])
        assert game['num_services'] == len(services)
        # Avoid an IP conflict with the organization VM (10.7.254.10)
        assert len(teams) < 200

        #Cleaning up previous creations
        clean_up(args.output_path, game_hash, teams, bundle=True)
        game_dir = gamepath(args.output_path, game_hash)
        root_key_path = os.path.join(game_dir, "root_key")
        root_public_key = create_ssh_key(root_key_path)

        create_org(args.output_path, game_hash, game_name, teams, services,
                   root_key_path, args.remote)

        for team_id, team in enumerate(teams, start=1):
            team_public_key = create_ssh_key("{}/team{}_key".format(
                game_dir, team_id))
            create_team(args.output_path, game_hash, team_id, root_public_key,
                        team_public_key, team['password'], sudo, services,
                        args.remote)
        bundle(game_hash, "Organization", "root_key", "organization",
               args.output_path, args.remote)
        for team_id, team in enumerate(teams, start=1):
            team_name = team['name']
            bundle(game_hash, "Team{}".format(team_id),
                   "team{}_key".format(team_id), team_name, args.output_path,
                   args.remote)

        status(game_hash, "Cleaning up the build")
        clean_up(args.output_path, game_hash, teams)

        status(game_hash, "READY")

    except:
        status(
            game_hash, "An error occurred. Contact us and report game "
            "{}".format(game_hash))
        status(game_hash, "ERROR")
        logging.exception("Exception")
        os.system("echo 'Creation for {} failed, see the log in /tmp' | "
                  "mail -s 'Error creating game {}' "
                  "root".format(game_hash, game_hash))