def load_details(conf, proc_details):
    '''
        loads the json specification for configuration and
        proc_details parameters.
    '''
    DB_CONFIG.update(load_json_file(conf))
    PROC_DETAILS.extend(load_json_file(proc_details))
Exemple #2
0
def get(city=None, taxi_id=None):
    if city and taxi_id:
        filedata = utils.load_json_file('taxis.json')["data"]
        results = [taxi for taxi in filedata if (taxi['city'] == city and taxi['name'] == taxi_id)]
        print(results)
        number_results = len(results)
        response = {
            "meta": {
                "count":number_results,
                "links":{
                    "self":"https://mock-travel-apis.herokuapp.com/taxis/"+city+"/"+taxi_id
                },
            },
            "data":results
        }
        return json.dumps(response)
    elif city:
        filedata = utils.load_json_file('taxis.json')["data"]
        results = [taxi for taxi in filedata if taxi['city'] == city]
        number_results = len(results)
        response = {
            "meta":{
                "count":number_results,
                "links":{
                    "self":"https://mock-travel-apis.herokuapp.com/taxis/"+city
                },
            },"data":results
        }
        return json.dumps(response)
    else:
        return utils.load_json_file('taxis.json')
Exemple #3
0
def kegg_map_coloring(name_prefix, color_rev=False):
    deg_json = {}
    map2ko = {}
    ko2color = {}

    # make sub folder for map images
    if not os.path.exists(name_prefix + '_kegg_maps'):
        os.makedirs(name_prefix + '_kegg_maps')

    try:
        deg_json = utils.load_json_file(name_prefix + '_represent_gene.cache')
    except FileNotFoundError:
        try:
            deg_json = utils.load_json_file(name_prefix +
                                            '_represent_isoform.cache')
        except FileNotFoundError:
            logger.warning('represent_gene or _isoform cache not found.')

    if deg_json:
        # build map2ko dict
        for deg in deg_json.values():
            if deg.get('keggko'):
                # workaround
                if type(deg['keggko']) is not list:
                    deg['keggko'] = [deg['keggko']]
                for keggko in deg['keggko']:
                    ko = keggko.split(':')[1]
                    for kmap in deg['keggmap']:
                        if kmap not in map2ko:
                            map2ko[kmap] = [ko]
                        else:
                            if ko not in map2ko[kmap]:
                                map2ko[kmap].append(ko)
        # build ko2color dict
        for deg in deg_json.values():
            if deg.get('keggko'):
                for keggko in deg['keggko']:
                    ko = keggko.split(':')[1]
                    if ko not in ko2color:
                        ko2color[ko] = [deg['hits'], deg['logFC']]
                    else:
                        if deg['hits'] > ko2color[ko][0]:
                            ko2color[ko] = [deg['hits'], deg['logFC']]
        for ko, fcs in ko2color.items():
            if color_rev:
                fc = -fcs[1]
            else:
                fc = fcs[1]
            ko2color[ko] = de_color_mapping(fc)

        # build kegg map request string
        for kmap, kos in map2ko.items():
            req = '' + kmap
            for ko in kos:
                req += '/' + ko + '%09' + ko2color[ko]
            kegg_weblink_pathway(name_prefix + '_kegg_maps', req)
        return True
Exemple #4
0
def test_handler():
    lex_handler = LexBotHandler()
    test_context = None
    test_event = load_json_file(os.path.join(TEST_DATA_DIR, 'test_med_time.json'))

    # test registering new intent handlers
    expected_response = '1234'
    lex_handler.register_intent('MedicationTime', lambda event: expected_response)
    assert lex_handler.handle_lambda(test_event, test_context) == expected_response

    # test when intent is not supported, gracefully respond to user
    not_supported_test_event = load_json_file(os.path.join(TEST_DATA_DIR, 'not_supported_intent.json'))
    response = lex_handler.handle_lambda(not_supported_test_event, test_context)
    assert 'Sorry' in response['dialogAction']['message']['content']
def run(load=False):
    """
    Entry point. This function could be called by external services.
    Connects to veganistan.se and fetches all important data and dumps it
    into a date-named json-file.
    Returns the filename of the created file.
    """
    created_file = None
    if load:
        data = load_json_file("json", "20140725_0940.json")
        entry_manager = EntryManager(data_dict=data)
    else:
        # start scraping the base data for all entries.
        entry_manager = scrape_base_info()

        created_file = serialize_and_save(
            entries=entry_manager.get_entries(),
            filename='json/%s.json' % datetime.now().strftime("%Y%m%d_%H%M")
        )

    for entry in entry_manager.get_entries():
        scrape_detail(entry)

    if created_file:
        return serialize_and_save(entry_manager.get_entries(), created_file)
    return None
Exemple #6
0
def fastp_parser(task):
    fastp_json_path = task.path.joinpath(task.id, 'reads', 'fastp.json')
    fastp_dict = utils.load_json_file(fastp_json_path)
    before_total_reads = fastp_dict['summary']['before_filtering'][
        'total_reads']
    before_total_bases = fastp_dict['summary']['before_filtering'][
        'total_bases']
    before_total_q30 = fastp_dict['summary']['before_filtering']['q30_rate']
    before_r1_length = fastp_dict['summary']['before_filtering'][
        'read1_mean_length']
    before_r2_length = fastp_dict['summary']['before_filtering'][
        'read2_mean_length']
    after_total_reads = fastp_dict['summary']['after_filtering']['total_reads']
    after_total_bases = fastp_dict['summary']['after_filtering']['total_bases']
    after_total_q30 = fastp_dict['summary']['after_filtering']['q30_rate']
    after_r1_length = fastp_dict['summary']['after_filtering'][
        'read1_mean_length']
    after_r2_length = fastp_dict['summary']['after_filtering'][
        'read2_mean_length']
    duplication_rate = fastp_dict['duplication']['rate']
    fastp_abs = {
        'before_total_reads': before_total_reads,
        'before_total_bases': before_total_bases,
        'before_total_q30': before_total_q30,
        'before_r1_length': before_r1_length,
        'before_r2_length': before_r2_length,
        'after_total_reads': after_total_reads,
        'after_total_bases': after_total_bases,
        'after_total_q30': after_total_q30,
        'after_r1_length': after_r1_length,
        'after_r2_length': after_r2_length,
        'duplication_rate': duplication_rate
    }
    return fastp_abs
Exemple #7
0
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH,
                                       BREAKING_LIBRARY_VERSIONS_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(
        FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH,
                                                    FILE_NAME_SEARCH_STRING)
    print("Found {} files".format(len(files_to_parse)))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}: Parsing + writing {}".format(count, ftp))
            file_contents = utils.load_json_file(ftp)
            lines_to_write = list()
            package_name = file_contents['name']
            for v in file_contents['versions']:
                lines_to_write.append({
                    'package_name': package_name,
                    "version": v['number'],
                    "version_published_at": v['published_at']
                })
            utils.write_lines_to_existing_csv(
                OUTPUT_FILE_PATH, BREAKING_LIBRARY_VERSIONS_FIELD_NAMES,
                lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING)
    total = len(files_to_parse)
    print("Found {} files".format(total))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}/{}: Parsing + writing {}".format(count, total, ftp))
            comments = utils.load_json_file(ftp)
            issue_id, repo_name = parse_issue_id_and_repo_name_from_file_name(ftp)
            lines_to_write = list()
            for c in comments:
                lines_to_write.append({
                    'id': c['id'],
                    'issue_id': issue_id,
                    'repo_name': repo_name,
                    'url': c['url'],
                    'issue_url': c['issue_url'],
                    'user_id': c['user']['id'],
                    'user_login': c['user']['login'],
                    'user_type': c['user']['type'],
                    'created_at': c['created_at'],
                    'updated_at': c['updated_at'],
                    'body': c['body'],
                })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES, lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
def run(load=False):
    """
    Entry point. This function could be called by external services.
    Connects to veganistan.se and fetches all important data and dumps it
    into a date-named json-file.
    Returns the filename of the created file.
    """
    created_file = None
    if load:
        data = load_json_file("json", "20140725_0940.json")
        entry_manager = EntryManager(data_dict=data)
    else:
        # start scraping the base data for all entries.
        entry_manager = scrape_base_info()

        created_file = serialize_and_save(
            entries=entry_manager.get_entries(),
            filename='json/%s.json' % datetime.now().strftime("%Y%m%d_%H%M"))

    for entry in entry_manager.get_entries():
        scrape_detail(entry)

    if created_file:
        return serialize_and_save(entry_manager.get_entries(), created_file)
    return None
def verify_and_test_db(main_file, host_id, run_params):
    # Parse main query file.
    parse_data = load_json_file(main_file)
    query_data = load_json_file('fi-framework/' + parse_data['query_file'])
    queries = query_data['queries']

    db_type = parse_data['db_type']
    db_init = parse_data['db_meta']

    localhost = '127.0.0.1'
    run_type = run_params['type']
    db_session = None
    if db_type == 'cassandra' and run_type in ['verify', 'test', 'query']:
        # Do not always delete the keyspace when the reuse_keyspace param is set to
        # False. Example given, when querying you probably do not want to remove the
        # data each time.
        force_reuse_keyspace = False
        if ('insert_data' in parse_data and parse_data['insert_data'] == True) and \
           (run_type != 'verify'):
            force_reuse_keyspace = True
        db_session = create_dbsession_from_type(db_type, db_init, host=localhost,
                                                force_reuse_keyspace=force_reuse_keyspace)

    if run_type == 'verify':  # Initialize and fill the verification database.
        _insert_and_verify_cmd(parse_data, db_session, queries)
        db_session.shutdown()
    elif run_type == 'test':  # Run the test queries and verification queries.
        _test_cmd(db_session, queries, run_params, db_type)
        db_session.shutdown()
    elif run_type == 'retrieve_targets':  # Retrieve DBMS target files.
        password = ''
        if 'password' in parse_data['server_meta']:
            password = parse_data['server_meta']['password']
            if not (isinstance(password, unicode) or isinstance(password, str)):
                password = password[host_id]
        _retrieve_cmd(run_params, parse_data, password, db_type)
    elif run_type == 'clear_verification_db':  # Clear the verification DBMS.
        print "Deleting verification db."
        verification_db = SQLiteDB()
        verification_db.drop_table()
    elif run_type == 'restore':  # Restore the current db_data directory with the backup tar.
        _restore_cmd(run_params)
    elif run_type == 'query':  # Query the DBSession.
        _query_cmd(db_session, run_params)
        db_session.shutdown()
    else:
        print "Unknown command given: {}".format(run_type)
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING)
    total = len(files_to_parse)
    print("Found {} files".format(total))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}/{}: Parsing + writing {}".format(count, total, ftp))
            commit = utils.load_json_file(ftp)
            if 'message' in commit and 'No commit found for SHA' in commit['message']:
                print('No Commit found...continuing')
                continue
            commit_sha, issue_id, repo_name = parse_artifacts_from_file_name(ftp)
            message = commit['commit']['message']
            url = commit['url']
            html_url = commit['html_url']
            author_login = commit['author']['login'] if commit['author'] is not None else ''
            author_type = commit['author']['type'] if commit['author'] is not None else ''
            committer_login = commit['committer']['login'] if commit['committer'] is not None else ''
            committer_type = commit['committer']['type'] if commit['committer'] is not None else ''
            stats_total = commit['stats']['total']
            stats_additions = commit['stats']['additions']
            stats_deletions = commit['stats']['deletions']

            def make_new_commit_line(f):
                return {
                    'commit_sha': commit_sha,
                    'issue_id': issue_id,
                    'repo_name': repo_name,
                    'url': url,
                    'html_url': html_url,
                    'message': message,
                    'author_login': author_login,
                    'author_type': author_type,
                    'committer_login': committer_login,
                    'committer_type': committer_type,
                    'stats_total': stats_total,
                    'stats_additions': stats_additions,
                    'stats_deletions': stats_deletions,
                    'file_name': f['filename'],
                    'file_status': f['status'],
                    'file_additions': f['additions'],
                    'file_deletions': f['deletions'],
                    'file_changes': f['changes'],
                    'file_patch': f['patch'] if 'patch' in f else None,
                }

            lines_to_write = list()
            for file in commit['files']:
                new_line = make_new_commit_line(file)
                lines_to_write.append(new_line)
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
Exemple #12
0
def inspect_data(path):
    sample = utils.load_json_file(path)
    key_fq = Counter([i["label"] for i in sample])
    sorted_key_fq = sorted(key_fq.items(), key=lambda x: x[1], reverse=True)
    log_obj.info("样本总数" + str(len(sample)))
    log_obj.info("类别总数" + str(len(key_fq)))
    log_obj.info(key_fq)
    log_obj.info(sorted_key_fq)
    return sorted_key_fq
Exemple #13
0
def process(path, train_to_path, dev_to_path, extend_sample_map):
    """
    :param extend_sample_map:  过采样比例
    """
    # 划分训练与验证集
    model0_sample = utils.load_json_file(path)
    sample_dic = {}
    for i in model0_sample:
        if i["label"] in sample_dic:
            sample_dic[i["label"]].append(i)
        else:
            sample_dic[i["label"]] = [i]

    train_examples = []
    test_examples = []
    for l, lis in sample_dic.items():
        a, b = train_test_split(lis, test_size=properties.test_dev_size)
        train_examples.extend(a)
        test_examples.extend(b)
    print("train vs dev=", len(train_examples), len(test_examples))

    # 扩充训练集
    train_dic = {}
    for i in train_examples:
        if i["label"] in train_dic:
            train_dic[i["label"]].append(i)
        else:
            train_dic[i["label"]] = [i]

    for k, v in train_dic.items():
        print(k, len(v))
    for label, ratio in extend_sample_map.items():
        if ratio <= 1:
            tmp_lis = copy.deepcopy(train_dic[label])
            tmp_lis = shuffle_list(tmp_lis)
            train_dic[label] = tmp_lis[:int(len(tmp_lis) * ratio)]
        else:
            tmp_lis = copy.deepcopy(train_dic[label])
            tmp_lis = shuffle_list(tmp_lis)
            for j in range(math.ceil(ratio)):
                train_dic[label].extend(tmp_lis)
            train_dic[label] = train_dic[label][:int(len(tmp_lis) * ratio)]
    print("重新采样后")
    for k, v in train_dic.items():
        print(k, len(v))
    train_examples = []
    for l, lis in train_dic.items():
        train_examples.extend(lis)

    train_examples = shuffle_list(train_examples)
    test_examples = shuffle_list(test_examples)
    log_obj.info("划分 训练集 : 验证集 = %s : %s" %
                 (len(train_examples), len(test_examples)))

    utils.dump_json_file(train_to_path, train_examples)
    utils.dump_json_file(dev_to_path, test_examples)
Exemple #14
0
    def test_authors_list(self):
        """
        Test if the list of top authors is returned correctly or not.
        """

        authors = git.Authors(self.git_index, self.start, self.end)
        authors_list = authors.aggregations()
        authors_test = load_json_file(TOP_AUTHORS)
        assert_array_equal(authors_list['keys'], authors_test['keys'])
        assert_array_equal(authors_list['values'], authors_test['values'])
    def test_get_list(self):
        """
        Testing multi valued aggregations.
        """

        self.Query_test_object.until(end=self.end)
        self.Query_test_object.get_terms("author_name")
        authors = self.Query_test_object.get_list()
        authors_test = load_json_file(AUTHORS_LIST)
        self.assertDictEqual(authors, authors_test)
Exemple #16
0
    def test_organization_list(self):
        """
        Test if the list of top organizations is returned correctly or not.
        """

        orgs = git.Organizations(self.git_index, self.start, self.end)
        orgs_list = orgs.aggregations()
        orgs_test = load_json_file(TOP_ORGANIZATIONS)
        assert_array_equal(orgs_list['keys'], orgs_test['keys'])
        assert_array_equal(orgs_list['values'], orgs_test['values'])
Exemple #17
0
    def test_update_dataset(self):
        data = load_json_file('basic_dataset.json')
        status, dataset = metax.create_dataset(data)
        self.assertIn(status, self.OK, "could not create dataset")

        # data = load_json_file('metax_dataset.json')
        dataset['research_dataset']['title']['en'] = 'title updated'
        status, updated_data = metax.update_dataset(dataset['id'], dataset)
        self.assertIn(status, self.OK, "Metax update failure")
        urn = updated_data["identifier"]
        etsin_status, etsin_data = etsin.view_dataset(urn)
        self.assertIn(etsin_status, self.OK, "Etsin failure")
Exemple #18
0
    def test_create_dataset(self):
        # loading the example dataset

        data = load_json_file('basic_dataset.json')
        status, cdata = metax.create_dataset(data)

        self.assertIn(status, self.OK, "could not create dataset")
        urn = cdata["identifier"]
        time.sleep(10)

        etsin_status, etsin_data = etsin.view_dataset(urn)
        self.assertIn(etsin_status, self.OK, "Etsin could not found the dataset")
Exemple #19
0
    def test_delete_dataset(self):
        data = load_json_file('basic_dataset.json')

        status, cdata = metax.create_dataset(data)
        self.assertIn(status, self.OK, "could not create dataset")
        urn = cdata["identifier"]

        time.sleep(2)
        status = metax.delete_dataset(cdata['id'])
        self.assertIn(status, self.OK, "Metax dataset delete failure")

        etsin_status, etsin_data = etsin.view_dataset(urn)
        self.assertIn(etsin_status, self.FAIL, "Etsin found the deleted dataset")
    def test_fetch_aggregation_results(self):
        """
        Test the fetched aggregation data
        """

        self.Query_test_object.until(end=self.end)\
                              .get_cardinality(self.field1)\
                              .by_authors(field=self.field2)
        response = self.Query_test_object.fetch_aggregation_results()
        aggregations = {"aggregations": response['aggregations']}
        actual_response = load_json_file(FETCH_AGGREGATION_RESULTS_DATA1)

        self.assertDictEqual(aggregations, actual_response)
Exemple #21
0
    def test_reject_dataset(self):
        # Create a dataset in metax and reject the dataset for preservation

        # loading the example dataset
        data = load_json_file('basic_dataset.json')

        # creating a dataset
        status, cdata = metax.create_dataset(data)
        self.assertIn(status, self.OK, "Metax create dataset fails")
        id = cdata['id']

        # rejecting the dataset
        status = pas.reject_dataset(id)
        self.assertIn(status, self.OK, "PAS dataset rejection fails")
Exemple #22
0
    def fresh(self, config_file=None, namespace=None):
        """
        sets the environment with a fresh config or namespace that is not
        the defaults if config_file or namespace parameters are given
        """
        if not config_file:
            config_file = self.config_file()
 
        self.__config_file = config_file
        self.__config = load_json_file(config_file)
        self._commander = self.__config.get('__cmds__')
        self._commander = self.__config.get('__scripts__')
        self.namespace = namespace
        self.wrappers = {}
    def test_fetch_results_from_source(self):
        """
        Testing if specific fields can be fetched from index
        """

        self.Query_test_object.until(end=self.end)
        self.Query_test_object.search = self.Query_test_object.search.extra(sort=[
            {
                "commit_date": {
                    "order": "asc"
                }
            }])
        response = self.Query_test_object.fetch_results_from_source(self.field2)
        actual_response = load_json_file(FETCH_SOURCE_RESULTS_DATA1)
        self.assertEqual(response, actual_response['hits'])
Exemple #24
0
    def _config(self):
        """
        Lazy load the config so that any errors happen then
        """
        if not self.__config_file:
            # If there is not config file then return an error.
            # TODO: Refactor the config code, it's overly confusing
            raise Exception("""No config found.  Set environment variable LNK_DIR to
                        point to your link configuration directory or create a
                        #.link/link.config file in your HOME directory""")

        if not self.__config:
            self.__config = load_json_file(self.__config_file)

        return self.__config
    def test_by_authors(self):
        """
        Test nested aggregation wrt authors
        """

        self.Query_test_object.get_sum(self.field3)\
                              .by_authors(self.field2)\
                              .since(start=self.start)\
                              .until(end=self.end)

        response = self.Query_test_object.fetch_aggregation_results()['aggregations']
        buckets = {"buckets": response['0']['buckets']}

        sum_lines_added = load_json_file(SUM_LINES_ADDED_BY_AUTHORS)
        self.assertEqual(sum_lines_added, buckets)
Exemple #26
0
    def _load_confs(self):
        """
        加载配置
        :return: 配置字典 {key:domain,value:config}
        """
        prefix = os.path.split(os.path.abspath(__file__))[0]
        path = os.sep.join([prefix, "configs"])
        files = get_path_files(path)
        config = dict()

        for f in files:
            obj = load_json_file(f)
            domain = obj["domain"]
            config[domain] = obj["conf"]
        return config
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                          INPUT_CSV_FILE_FIELD_NAMES)
    total = len(repos)
    count = 0
    list_to_write = list()
    for repo in repos:
        repo_name = repo['repo_name']
        try:
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_name))
            project_git_folder = f"{REPOS_PATH}/repos/{repo_name.replace('/', '#')}"
            if os.path.isdir(project_git_folder):
                package_json_path = f"{project_git_folder}/package.json"
                if utils.file_or_read_file_already_exists(package_json_path):
                    package_json_contents = utils.load_json_file(
                        package_json_path)
                    # Will throw if 'name' is not there
                    list_to_write.append({
                        'repo_name':
                        repo_name,
                        'package_name':
                        package_json_contents['name']
                    })
                else:
                    # package.json doesn't exist
                    list_to_write.append({
                        'repo_name': repo_name,
                        'package_name': None
                    })
            else:
                # Could not clone project
                list_to_write.append({
                    'repo_name': repo_name,
                    'package_name': None
                })
        except Exception as e:
            list_to_write.append({
                'repo_name': repo_name,
                'package_name': None
            })
    utils.write_lines_to_new_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES,
                                 list_to_write)
    print("Done")
Exemple #28
0
    def __load_all_jsons(self, targetPath):
        """
        loads all files from directory
        """
        currentDir = os.path.join(os.getcwd(), targetPath)

        files = [
            x for x in os.listdir(currentDir)
            if os.path.isfile(os.path.join(currentDir, x))
        ]

        for file in files:

            if file.endswith('.json'):
                jsonObject = load_json_file(os.path.join(currentDir, file))
                self.add_dialogue_file(jsonObject=jsonObject, fileName=file)
Exemple #29
0
    def _config(self):
        """
        Lazy load the config so that any errors happen then
        """
        if not self.__config_file:
            # If there is not config file then return an error.
            # TODO: Refactor the config code, it's overly confusing
            raise Exception(
                """No config found.  Set environment variable LNK_DIR to
                        point to your link configuration directory or create a
                        #.link/link.config file in your HOME directory""")

        if not self.__config:
            self.__config = load_json_file(self.__config_file)

        return self.__config
Exemple #30
0
    def test_preserve_dataset(self):
        # Create a dataset in metax and preserve the dataset

        # loading the example dataset
        data = load_json_file('basic_dataset.json')

        # creating a dataset
        status, cdata = metax.create_dataset(data)

        self.assertIn(status, self.OK, "Metax create dataset fails")
        id = cdata['id']

        # preserving the dataset
        status = pas.preserve_dataset(id)

        self.assertIn(status, self.OK, "PAS preserve fails")
    def test_by_period_with_params(self):
        """
        Test the date_histogram aggregation with all the parameters
        """

        self.Query_test_object.since(start=self.start)\
                              .until(end=self.end)\
                              .get_cardinality(self.field1)\
                              .by_period(field=self.date_field2,
                                         period="quarter",
                                         timezone=self.timezone)

        response = self.Query_test_object.fetch_aggregation_results()['aggregations']
        hash_by_period = load_json_file(NUM_HASHES_BY_QUARTER)
        buckets = {"buckets": response['0']['buckets']}

        self.assertEqual(hash_by_period, buckets)
Exemple #32
0
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, ISSUE_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(
        FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH,
                                                    FILE_NAME_SEARCH_STRING)
    print("Found {} files".format(len(files_to_parse)))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}: Parsing + writing {}".format(count, ftp))
            issues = utils.load_json_file(ftp)
            repo_name = parse_repo_name_form_file_name(ftp)
            lines_to_write = list()
            for i in issues:
                lines_to_write.append({
                    'id': i['id'],
                    'repo_name': repo_name,
                    'url': i['url'],
                    'repository_url': i['repository_url'],
                    'comments_url': i['comments_url'],
                    'events_url': i['events_url'],
                    'html_url': i['html_url'],
                    'number': i['number'],
                    'title': i['title'],
                    'user_id': i['user']['id'],
                    'user_login': i['user']['login'],
                    'user_type': i['user']['type'],
                    'state': i['state'],
                    'locked': i['locked'],
                    'comments': i['comments'],
                    'created_at': i['created_at'],
                    'updated_at': i['updated_at'],
                    'closed_at': i['closed_at'],
                    'body': i['body'],
                    'is_pull_request': 'pull_request' in i
                })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                              ISSUE_FIELD_NAMES,
                                              lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
    def test_get_terms(self):
        """
        Test the terms aggregation
        """

        field = self.field2
        # without field param
        with self.assertRaises(AttributeError):
            self.Query_test_object.get_terms()

        # with field param
        self.Query_test_object.get_terms(field)\
                              .since(start=self.start)\
                              .until(end=self.end)
        response = self.Query_test_object.fetch_aggregation_results()['aggregations']
        buckets = {"buckets": response['0']['buckets']}
        authors = load_json_file(TERMS_AGGREGATION_DATA)
        self.assertEqual(authors, buckets)
Exemple #34
0
    def set_file(self, filePath, fileName=None):
        """
        sets the file and tries to load it to use
        """
        self.__filePath = filePath

        if fileName:
            self.__fileName = fileName
            try:
                self.__dialogues = load_json_file(
                    os.path.join(self.__filePath, self.__fileName))
            except FileNotFoundError:
                save_json_file(obj=self.__dialogues,
                               path=os.path.join(self.__filePath,
                                                 self.__fileName))

        else:
            self.__fileName = DialogueAnnotator.__DEFAULT_FILENAME
Exemple #35
0
def build_argument_data_batch(file_name, FV, clf):
	gold_list = []
	matrix_list = []
	f_json = utils.load_json_file(file_name)

	for sentence in f_json['sentences']:
		event_candidates_list = sentence['eventCandidates']
		for event in event_candidates_list:
			argumentslist = event['arguments']
			for argument in argumentslist:
				arg_index = argument['begin']
				token_index = event['begin'] 
				matrix_list.append( FV.get_feature_matrix_argument_prediction(token_index, arg_index, sentence, clf) )
				gold_list.append( argument['gold'] )

	if len(matrix_list) == 0:
		return None, None

	if clf=='perc':
		return matrix_list, gold_list
	elif clf=='nb':
		return vstack(matrix_list), gold_list
Exemple #36
0
def build_trigger_data_batch(file_name, FV, clf):
	trigger_list = []
	token_index_list = []
	sentence_list = []
	f_json = utils.load_json_file(file_name)

	for sentence in f_json['sentences']:
		event_candidates_list = sentence['eventCandidates']
		for event in event_candidates_list:
			token_index_list.append( event['begin'] )
			sentence_list.append(sentence)
			trigger_list += [ event['gold'] ]

	matrix_list = []
	for token_index,sentence in zip(token_index_list, sentence_list):
		matrix_list.append( FV.get_feature_matrix(token_index, sentence, clf) )

	if len(matrix_list) == 0:
		return None, None
	
	if clf=='perc':
		return matrix_list, trigger_list
	elif clf=='nb':
		return vstack(matrix_list), trigger_list	
Exemple #37
0
def serialize_and_save(entries, filename):
    json_data = serialize_items(entries)

    return save_json_file(
        data=json_data,
        filename=filename)


if __name__ == "__main__":

    # TODO: Accept sys args
    load = False

    created_file = None
    if load:
        data = load_json_file("json", "20140725_0940.json")
        entry_manager = EntryManager(data_dict=data)
    else:
        # start scraping the base data for all entries.
        entry_manager = scrape_base_info()

        created_file = serialize_and_save(
            entries=entry_manager.get_entries(),
            filename='json/%s.json' % datetime.now().strftime("%Y%m%d_%H%M")
        )

    # json_data = serialize_items(entry_manager.get_entries())
    # save_json_file(
    #     json_data,
    #     "json",
    #     '%s.json' % datetime.now().strftime("%Y%m%d_%H%M"))
Exemple #38
0
def main():

	################### EXPLORATORY DATA ANALYSIS #############################

	# Just testing my functions a bit
	list_of_files = utils.list_files()
	print (list_of_files[0])
	f1 = utils.load_json_file(list_of_files[0])
	pprint(len(f1['sentences']))
	    
	# Finding and counting all event triggers
	t = utils.get_all_triggers(list_of_files)
	print("Number of distinct event triggers: {0}".format(len(t.keys())))
	pprint(t)

	# Finding and counting all possible arguments (=relationship labels)
	arg = utils.get_all_arguments(list_of_files)
	print("Number of relation arguments: {0}".format(len(arg.keys())))
	pprint(arg)

	########################## NAIVE BAYES ####################################

	# Crossvalidation
	rates = [0.5,0.6,0.7,0.8,0.9,0.95]
	# x = crossvalidation_experiment(rates, list_of_files, load=True, mode='trig', k=3)
	# pprint(x)

	# x2 = crossvalidation_experiment(rates, list_of_files, load=True, mode='arg', k=3)
	# pprint(x2)

	## Naive Bayes on trigger
	# Read data
	print "Experiment 1: Naive Bayes predicting triggers"
	FV_trig = feature_vector.FeatureVector('trigger')
	train_list, valid_list = utils.create_training_and_validation_file_lists(list_of_files)

	X_train, y_train = build_dataset(train_list, FV_trig, ind=1, kind='train', mode='trig', clf='nb', load=True)
	X_train, y_train = subsample(X_train, y_train, clf='nb', subsampling_rate=0.50)
	X_valid, y_valid = build_dataset(valid_list, FV_trig, ind=1, kind='valid', mode='trig', clf='nb', load=True)

	NB_trig = nb.NaiveBayes()
	NB_trig.train(np.asarray(X_train.todense()),np.asarray(y_train))

	# print "Evaluate Naive Bayes classifer predicting triggers on the train set..."
	# CM, prec, rec, F1 = NB_trig.evaluate(np.asarray(X_train.todense()), np.asarray(y_train))
	# print "Precision: {0}".format(prec)
	# print "Recall: {0}".format(rec)
	# print "F1-measure: {0}".format(F1)
	# print "Confusion matrix:\n", np.int64(CM)

	print "Evaluate Naive Bayes classifer predicting triggers on the validation set..."
	CM, prec, rec, F1 = NB_trig.evaluate(np.asarray(X_valid.todense()), np.asarray(y_valid))
	print "Precision: {0}".format(prec)
	print "Recall: {0}".format(rec)
	print "F1-measure: {0}".format(F1)
	print "Confusion matrix:\n", np.int64(CM)

	## Naive Bayes on argument

	print "Experiment 2: Naive Bayes predicting arguments"
	FV_arg = feature_vector.FeatureVector('argument')

	X_train, y_train = build_dataset(train_list, FV_arg, ind=1, kind='train', mode='arg', clf='nb', load=True)
	X_train, y_train = subsample(X_train, y_train, clf='nb', subsampling_rate=0.50)
	X_valid, y_valid = build_dataset(valid_list, FV_arg, ind=1, kind='valid', mode='arg', clf='nb', load=True)

	NB_arg = nb.NaiveBayes()
	NB_arg.train(np.asarray(X_train.todense()), np.asarray(y_train))

	# print "Evaluate Naive Bayes classifer predicting arguments on the train set..."
	# CM, prec, rec, F1 = NB_arg.evaluate(np.asarray(X_train.todense()), np.asarray(y_train))
	# print "Precision: {0}".format(prec)
	# print "Recall: {0}".format(rec)
	# print "F1-measure: {0}".format(F1)
	# print "Confusion matrix:\n", np.int64(CM)

	print "Evaluate Naive Bayes classifer predicting arguments on the validation set..."
	CM, prec, rec, F1 = NB_arg.evaluate(np.asarray(X_valid.todense()), np.asarray(y_valid))
	print "Precision: {0}".format(prec)
	print "Recall: {0}".format(rec)
	print "F1-measure: {0}".format(F1)
	print "Confusion matrix:\n", np.int64(CM)
#load weights of pretrained perceptron. 
with open('Perceptron_trigger.data', 'rb') as f:
    Lambda_e, misc_e = cPickle.load(f)
with open('Perceptron_argument.data', 'rb') as f:
    Lambda_a, misc_a = cPickle.load(f)
    
   
for i_f,test_file in enumerate(evaluate_test_list):
    print 'Test File', i_f, 'of' , len(evaluate_test_list)
    
    #generate predictions for current file, p_e and p_a are the predicted values.
    (p_e, g_e) = perc.test_perceptron(FV_trig, Lambda_e, [test_file], mode='Trigger')
    (p_a, g_a) = perc.test_perceptron(FV_arg, Lambda_a, [test_file], mode='Argument')
                        
    f_fill_this = utils.load_json_file(test_file)    
    counter_e = 0
    counter_a = 0
    for sentence in f_fill_this['sentences']:
        event_candidates = sentence['eventCandidates']
        for ec in event_candidates:
            ec['predicted'] = FV_trig.trigger_list[p_e[counter_e]]
            counter_e +=1
            for arg in ec['arguments']:
                arg['predicted'] = FV_arg.arguments_list[p_a[counter_a]]
                counter_a +=1
                
    if counter_e != len(p_e):
        print 'PROBLEM: LENGTH OF PREDICTION VECTOR (trigger) DOESNT FIT!'
    if counter_a != len(p_a):
        print 'PROBLEM: LENGTH OF PREDICTION VECTOR (argument) DOESNT FIT!'
            'name': boss_name,
            'loot_table': [],
        }

    boss_list[str(boss_id)]['loot_table'].append({
        'id': item_id,
        'name': item_name,
        'slot': slot.lower(),
        'level': item_level,
        'difficulties': difficulties,
        'specs': item_specs,
    })


# Save the raid informations in a JSON file
json_raids = load_json_file(os.path.join(dest, 'raids.json'), [])

found = False
for raid_entry in json_raids:
    if raid_entry['name'] == raid_name:
        raid_entry['boss'] = boss_list
        raid_entry['wings'] = wings_list
        found = True
        break

if not(found):
    json_raids.append({
        'name': raid_name,
        'wings': wings_list,
        'boss': boss_list,
    })