Python get_paths Examples, config.get_paths Python Examples

Example #1

0

Show file

File: dataset.py Project: romusters/activelearning

 def __init__(self, root):
     import config
     root, data_path, model_path, vector_path = config.get_paths()
     self.root = root
     self.data_path = data_path
     self.model_path = model_path
     self.vector_path = vector_path
     self.all_hashtags = [
         "voetbal", "moslim", "werk", "economie", "jihad", "seks",
         "politiek"
     ]  #
     self.all_vectors_store = pd.HDFStore(self.root + "w2v_vector.h5")
     self.balanced_store = pd.HDFStore(self.root +
                                       "datasets/seeds/balanced.h5")
     self.tweets = pd.read_csv(self.root + "datasets/data_sample.csv")

Example #2

0

Show file

File: enhance.py Project: abtkod/database-fulltext-search

def partition_and_generate_distributions(index_name: str):
    configuration = config.get_paths()
    ix = index.open_dir(configuration[index_name], readonly=True)
    LOGGER.info('Index path: ' + configuration[index_name])
    with ix.reader() as ix_reader:
        pa = pt.Partitioner(ix, ix_reader)
        print('Partitioner initiated!')
        parts = pa.generate([0.98, 0.1])
        parts = [p for p in parts]
        print('Parts created!')
        print('naive1 ({}, {})'.format(parts[0].name, parts[1].name))
        sol.generate_distance_distributions(
            cache=parts[0],
            disk=parts[1],
            save_path='/data/khodadaa/index/data',
            distance_type=['kld', 'avg-kld'])

Example #3

0

Show file

# test if the amount of tokens has an influence on the nn performance
# we saw that it doesnt work well when all tweet of varying amount of tokens is used.

import pandas as pd
import numpy as np
import os, sys
import features
import config
root, data_path, model_path, vector_path = config.get_paths()
print root

import pandas as pd

import dataset
dset = dataset.Dataset(root)
# dset.create_subject_sets()

## Train a neural network on amount of tokens individually
vectors = dset.all_vectors_store["data"]
voetbal = pd.read_csv(root + "datasets/seeds/voetbal.csv")
voetbal = pd.merge(voetbal, vectors, on="id")
voetbal["labels"] = 0
jihad = pd.read_csv(root + "datasets/seeds/jihad.csv")
jihad = pd.merge(jihad, vectors, on="id")
jihad["labels"] = 1

#determine tokens
rm_list = ["<stopword>", "<mention>", "<url>", "rt"]
voetbal["ntokens"] = voetbal.filtered_text.apply(
    lambda x: len([a for a in x.split() if a not in rm_list]))
jihad["ntokens"] = jihad.filtered_text.apply(

Example #4

0

Show file

from time import sleep

from config import get_paths
from models import DDQNLearner, DDQNPlayer
from utils import make_atari, wrap_deepmind, parse_args
from utils import Logger, Plotter

args = parse_args()
# for arg in vars(args):
# 	print(arg, getattr(args, arg))

ENV_NAME = args.env_name
ENV_VER = args.env_version
ENV_GYM = ENV_NAME + ENV_VER

save_dirs = get_paths(drive=args.drive_save, env_name=ENV_NAME)

PRINT_FREQ_EP = args.log_freq
SAVE_MODEL_FREQ = args.save_freq
LEARNING_START = args.learn_start

logger = Logger(save_dirs=save_dirs,
                log_types=[],
                log_freq=args.log_freq,
                mode=args.mode)
plotter = Plotter(save_dirs=save_dirs,
                  plot_types=[
                      'avg_scores_ep', 'avg_scores_ts', 'avg_scores_100_ep',
                      'avg_scores_100_ts', 'scores_ep', 'scores_ts',
                      'high_scores_ep', 'high_scores_ts', 'low_scores_ep',
                      'low_scores_ts', 'avg_loss_ep', 'avg_acc_ep',

Example #5

0

Show file

    with ix.searcher() as searcher:
        query = QueryParser(field_name, ix.schema).parse(user_query)
        facet = sorting.FieldFacet('count', reverse=True)
        results = searcher.search(query, sortedby=facet, limit=limit)
        print(results)
        for res in results:
            print('\n', res)
            if res.reader.has_vector(res.docnum, field_name):
                vgen = res.reader.vector_as('frequency', res.docnum,
                                            field_name)
                terms = [v for v in vgen]
                terms.sort(key=lambda tup: tup[1], reverse=True)
                print('Top terms: ', terms)
            else:
                print('0 term')


if __name__ == '__main__':
    index_name = 'wiki13_index'
    limit = None
    if len(sys.argv) > 1:
        index_name = sys.argv[1]
    if len(sys.argv) > 2:
        limit = sys.argv[2]
    configuration = config.get_paths()

    user_query = 'public policy NOT \"public policy\"'
    while user_query != ':q':
        search(user_query, limit, configuration[index_name])
        user_query = input('Query [:q to exit] : ')

Example #6

0

Show file

File: querydifficulty.py Project: abtkod/database-fulltext-search

            prob_d_condit_q = prob_q_condit_d / norm
            prob += prob_t_condit_d * prob_d_condit_q
        return prob

    clt = 0.0
    for t in vocabulary:
        if collection_tfs[t] == 0:
            collection_tfs[t] = 1
        prob_t_condit_D = collection_tfs[t] / collection_total_terms
        prob_t_condit_Dq = get_prob_t_condition_Dq(t)
        clt += prob_t_condit_Dq * log(prob_t_condit_Dq / prob_t_condit_D)
    return clt


if __name__ == '__main__':
    c = config.get_paths()
    index_path = c[sys.argv[1]]
    query_file_path = sys.argv[2]
    save_path = sys.argv[3]

    config.setup_logger('querydifficulty')

    ix = index.open_dir(index_path, readonly=True)
    LOGGER.info('Index path: ' + index_path)
    ix_reader = ix.reader()

    vocabulary = []
    db_tfs = defaultdict(int)
    db_total_terms = 0
    with open(c['db_tfs'], 'r') as fr:
        for line in fr: