コード例 #1
0
    def katz_similarity(self, t):
        i, j = t[0], t[1]
        l = 1
        neighbors = self.Graph[i]
        score = 0.0
        while l <= self.maxl:
            numberOfPaths = neighbors[0, j]  # neighbors.count(j)
            if numberOfPaths > 0:
                score += (self.beta ** l) * numberOfPaths
            l += 1
            if l <= self.maxl:
                neighborsForNextLoop = csr_matrix((1, self.G.number_of_nodes()), dtype=np.uint16)
                for k in neighbors.nonzero()[1]:
                    neighborsForNextLoop += (neighbors[0, k] * self.Graph[k])
                neighbors = neighborsForNextLoop
        self.katz_scores.append((i, j, score))
        self.done.add((i,j))
        if i % 10 == 0 and j % 100 == 0:
            print(i, j)
            start_time = time.perf_counter()
            # your code

            pickle_object(self.f_name+'matrix.p',self.katz_scores)
            elapsed_time =  time.perf_counter() - start_time
            print('left ',len(self.edges) - len(self.katz_scores),'edges')
            print('it took in seconds,', elapsed_time)
コード例 #2
0
 def get_normalized_feature_dict(self):
     try:
         return unpickle_object(self.file_name_pickle)
     except:
         print("can't unpickle features, calculating")
         result = self.convert_features_to_dict(
             self.normalize_features(self.create_graph_features()))
         pickle_object(self.file_name_pickle, result)
         return result
コード例 #3
0
 def set_credentials(self, user, password, store=False):
     self.config["user"] = user
     self.config["passwd"] = password
     if store:
         pickle_object(
             {
                 "user": encrypt_string(user),
                 "passwd": encrypt_string(password),
             },
         self.storage,
         )
コード例 #4
0
 def execute_jobs(self, append_query=None):
     jobs = self.get_open_jobs()
     if not append_query == None:
         jobs.append(query)
     for i, job in enumerate(jobs):
         try:
             self.execute_query(job)
         except Exception as e:
             print(e)
             continue
         else:
             del jobs[i]
     pickle_object([encrypt_string(j) for j in jobs], self.open_jobs)
コード例 #5
0
 def read_release(self, version):
     print('reading release...')
     version = self.normalize_version(version)
     file_reader_pickle_path = self.get_file_reaeder_pickle_for_version(version)
     try:
         if self.force_read:
             raise ValueError('Forcing read')
         drug_reader = unpickle_object(file_reader_pickle_path)
     except:
         print('failed to unpickle')
         release_path = self.get_relese_path(version)
         drug_reader = drug_data_reader(release_path)
         drug_reader.read_data_from_file()
         pickle_object(file_reader_pickle_path, drug_reader)
     # drug_id_to_name = drug_reader.drug_id_to_name
     return drug_reader
コード例 #6
0
    def preproc_release(self, drug_reader, version):

        print('postprocessing release...')
        version = self.normalize_version(version)
        preproc_pickle_path = self.get_preproc_pickle_for_version(version)
        try:
            if self.force_read:
                raise ValueError('Forcing read')
            preproc = unpickle_object(preproc_pickle_path)
        except:
            print('failed to unpickle')
            print('num all drugs in reader:', len(drug_reader.all_drugs))
            preproc = drugs_preproc(drug_reader.drug_to_interactions, drug_reader.all_drugs)
            preproc.calc_valid_drugs_print_summary()
            preproc.create_valid_drug_interactions()
            pickle_object(preproc_pickle_path, preproc)
        return preproc
コード例 #7
0
    def _param_run(
            self, param_set: ParamSet) -> Tuple[ExperimentResults, RunnerUUID]:
        log(f'Running param set: {param_set}')

        uuid = hash_dict(param_set)

        if self._experiment_result_exists(uuid):
            log('Loading experiment results from cache')
            log(uuid)
            experiment_results = unpickle(
                self._file_path_experiment_results(uuid))
        else:
            log(f'Running uuid {uuid}')
            experiment_results = train_kd(**param_set)
            pickle_object(experiment_results,
                          self._file_path_experiment_results(uuid))

        return experiment_results, uuid
コード例 #8
0
def _serialize(obj):
    ''' Serializes an arbitrary object for transfer

    Parameters
    ----------
    obj : `obj`
        object to be serialized for transfer

    Returns
    -------
    pickled_object : `lasso.ansa.rpc.PickledObject`
        protobuf serialized message

    Notes
    -----
        Converts any ansa entities to remote placeholders.
    '''

    # first convert ansa entites to fake entities
    if isinstance(obj, ansa.base.Entity):
        obj = _serialize_ansa_entity(obj)
    elif isinstance(obj, list):
        obj = [
            _serialize_ansa_entity(entry) if isinstance(
                entry, ansa.base.Entity) else entry for entry in obj
        ]
    elif isinstance(obj, tuple):
        obj = tuple(
            _serialize_ansa_entity(entry) if isinstance(
                entry, ansa.base.Entity) else entry for entry in obj)
    elif isinstance(obj, dict):
        obj = {
            _serialize_ansa_entity(key) if isinstance(key, Entity) else key:
            _serialize_ansa_entity(value)
            if isinstance(value, Entity) else value
            for key, value in obj.items()
        }

    # then we pickle everything
    return AnsaGRPC_pb2.PickledObject(data=pickle_object(obj))
コード例 #9
0
ファイル: build_tokenizer.py プロジェクト: kmanchel/BiasNet
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--save_batch_every", default=1)

args = parser.parse_args()

if __name__ == "__main__":

    params = Params('params.json')
    data = Corpus(params)
    iterator = iter(data.dataset)

    tokenizer = tf.keras.preprocessing.text.Tokenizer(
        filters='"()*,-/;[\]^_`{|}~', oov_token='UNK', char_level=False)
    necessary_its = (48000 // params.batch_size)
    print("Batch iterations: %d" % necessary_its)
    i = 0
    print("\nTokenizer saved as %s" % params.tokenizer_file)
    while i <= necessary_its:
        for batch, _ in tqdm(iterator):
            batch = np.char.decode(batch.numpy().astype(np.bytes_), 'UTF-8')
            tokenizer.fit_on_texts(batch)
            if i % args.save_batch_every == 0:
                pickle_object(tokenizer, params.tokenizer_file)

            i += 1
        if necessary_its // i == 2:
            print("HALFWAY DONE")
    print("DONE BUILDING TOKENIZER")
コード例 #10
0
ファイル: gold_daily.py プロジェクト: syyunn/gold
import utils

fname = "../raw/gold_daily.csv"
f = open(fname, 'r')

dates = []
golds = []

for line in f.readlines():
    content = line.split(',')
    idx = content[0]
    if len(idx) == 0:
        continue
    date = content[1]
    gold = float(content[2])

    dates.append(date)
    golds.append(gold)

df = utils.make_as_pandas_df(dates, content_name='Gold', content_list=golds)

utils.standard_plot(df)

utils.pickle_object(df, "../data/gold_daily.pkl")

if __name__ == "__main__":
    pass
コード例 #11
0
ファイル: analyze_volatility.py プロジェクト: syyunn/gold
    #print(curr_vol)
    sign = np.sign(curr_vol)
    #print(sign)

    if sign == 1:
        count_plus += 1
    elif sign == -1:
        count_minus += 1
    elif sign == 0:
        count_zero += 1

    dates.append(curr_date)
    signs.append(sign)

df_sign = utils.make_as_pandas_df(dates_list=dates,
                                  content_list=signs,
                                  content_name="sign_of_volatility")

utils.pickle_object(df_sign, "volatility_data/sign_daily_gold.pkl")

utils.standard_plot(df_sign[:100],
                    column_name="sign_of_volatility",
                    scatter=True)

print("count_plus: {}, count_minus: {}, conut_zero: {}".format(
    count_plus, count_minus, count_zero))

if __name__ == "__main__":
    pass
コード例 #12
0
                    "{}-{}-{}?" \
                    "access_key={}&" \
                    "symbols={}&" \
                    "format=1".\
                format(year, month, day, access_key, currencies)

            request = json.loads(requests.get(query).text)
            count += 1

            if 'rates' not in request.keys():
                print("NULL")
                continue

            usd = request['rates']['USD']
            krw = request['rates']['KRW']

            usd_over_krw = usd / krw
            krw_over_usd = krw / usd

            item = dict()
            item[date] = krw_over_usd
            rates.append(item)

            print(date, " : ", krw_over_usd)

            if date == "1997-12-31":
                pickle_object(rates, 'KRW_USD_1997.pkl')

if __name__ == "__main__":
    pass
コード例 #13
0
import pandas as pd

import utils

filename = "monthly"
f = open("raw/{}.csv".format(filename), "r")
lines = f.readlines()

dates = []
prices = []
for line in lines:
    date, price = line.split(',')
    dates.append(date)
    prices.append(price)

dates = np.array(dates, dtype='datetime64[M]')
prices = np.array(prices, dtype='float64')

data = {'Date': dates, 'Gold': prices}

df = pd.DataFrame(data)

utils.pickle_object(df, "data/Gold_{}.pkl".format(filename))

data_path = "data/gold_monthly.pkl"
df = utils.load_pickle(data_path)
utils.standard_plot(df)

if __name__ == "__main__":
    pass
コード例 #14
0
ファイル: cpi_monthly.py プロジェクト: syyunn/gold
filename = "cpi_monthly"
f = open("../raw/{}.txt".format(filename), "r")
lines = f.readlines()

dates = []
cpis = []

for line in lines:
    split = line.split('\t')
    try:
        year = int(split[0])
    except ValueError: # get rid of str column data
        continue
    print(split)

    for month, cpi in enumerate(split[1:13]):
        date = datetime.datetime(year, month+1, 1, 0, 0)
        dates.append(date)
        cpis.append(float(cpi))

df = utils.make_as_pandas_df(dates_list=dates,
                             content_list=cpis,
                             content_name='CPI')

utils.standard_plot(df, column_name='CPI')
utils.pickle_object(df, "../data/cpi_monthly.pkl")

if __name__ == "__main__":
    pass
コード例 #15
0
        word = values[0]
        coefs = np.asarray(values[1:], dtype="float32")
        embeddings_index[word] = coefs
    f.close()
    print("found %s word vectors" % len(embeddings_index))

    # embedding matrix
    print("preparing embedding matrix...")
    words_not_found = []

    embedding_matrix = np.zeros((len(word_index) + 1, 300))

    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if (embedding_vector is not None) and len(embedding_vector) > 0:

            embedding_matrix[i] = embedding_vector
        else:
            words_not_found.append(word)

    return embedding_matrix


if __name__ == "__main__":

    params = Params("params.json")
    tokenizer = load_pickle(params.tokenizer_file)
    embeddings_matrix = load_subword_embedding(tokenizer.word_index, args.emb_path)
    pickle_object(embeddings_matrix, params.embedding_matrix)
    print("Created and Saved Embedding Matrix @ %s" % params.embedding_matrix)
コード例 #16
0
ファイル: silver_daily.py プロジェクト: syyunn/silver
dates = []
silvers = []

for line in f.readlines():
    content = line.split(',')
    idx = content[0]
    print(idx)
    if len(idx) == 0:
        continue
    date = content[1]
    silver = content[2].split('\n')[0]
    if len(silver) == 0:
        continue
    print(silver)
    silver = float(silver)

    dates.append(date)
    silvers.append(silver)

df = utils.make_as_pandas_df(dates,
                             content_name='Silver',
                             content_list=silvers)

utils.standard_plot(df, column_name='Silver')

utils.pickle_object(df, "../data/silver_daily.pkl")

if __name__ == "__main__":
    pass
コード例 #17
0
train_ratio=0.7
validation_ratio=0.0
test_ratio =0.3

#Holdout:
evaluation_method =['Retrospective', 'Holdout'][1]
new_version="5.1.1"
old_version = "5.0.0"

#spliting to train\test
if evaluation_method == 'Retrospective':
    m_test,m_train,evaluator,test_tuples, i2d,evaluation_type,drug_id_to_name  = create_train_test_split_relese(old_relese = old_version,new_relese=new_version)
else:
    m_test,m_train,evaluator,test_tuples, i2d, evaluation_type,drug_id_to_name = create_train_test_split_ratio(new_version,train_ratio,validation_ratio,test_ratio)

G = nx.from_numpy_matrix(m_train)
edges = sorted([(i, j) for i in G.nodes() for j in G.nodes() if j>=i])
def split_list(alist, wanted_parts=1):
    length = len(alist)
    return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts]
             for i in range(wanted_parts) ]

edges_parts = split_list(edges,wanted_parts=10)
part = 2 # done: 0, 1, 4,3,5  Working: here: 5, VPN: .
katz_calc = k()
katz_calc.G = G
katz_calc.edges = set(edges_parts[part])
katz_scores = katz_calc.katz()

pickle_object(f'kats_scores_final_{part}.pickle',katz_scores)
コード例 #18
0
fname = "data/gold_daily.pkl"
df = utils.load_pickle(fname)
utils.standard_plot(df, column_name='Gold')

num_data = df.shape[0]

volatiles = []
dates = []

for idx in range(num_data-1):
    curr_day = df.loc[idx]["Date"]
    curr_gold = df.loc[idx]["Gold"]
    tmr_gold = df.loc[idx+1]["Gold"]
    target_volatility = (tmr_gold - curr_gold) / curr_gold
    target_volatility_percentage = target_volatility * 100
    print("idx: {}, vol: {}".format(curr_day, target_volatility_percentage))

    volatiles.append(target_volatility_percentage)
    dates.append(curr_day)

df_volatile = utils.make_as_pandas_df(dates, content_list=volatiles,
                                      content_name="Volatility")

utils.standard_plot(df_volatile, column_name="Volatility")

utils.pickle_object(df_volatile, "gold_daily.pkl")


if __name__ == "__main__":
    pass
コード例 #19
0
gold_normals = []

for gold_idx, date in enumerate(normal_target_dates):
    gold = df_gold['Gold'][gold_idx]

    cpi_date = datetime.datetime(date.year, date.month, 1, 0, 0)
    cpi_idx = df_cpi['Date'][df_cpi['Date'] == cpi_date].index.tolist()

    if len(cpi_idx) == 0:
        continue
    else:
        cpi_idx = cpi_idx[0]
    # print(cpi_idx)
    cpi = df_cpi['CPI'][cpi_idx]
    print("cpi: {}, gold: {}".format(cpi, gold))

    normalizer = 100 / cpi
    gold_normal = gold * normalizer
    gold_normals.append(gold_normal)
    gold_normals_date.append(date)

data_cpi_normalized = {'Date': gold_normals_date, 'Gold_Normal': gold_normals}

df_gold_normal = pd.DataFrame(data_cpi_normalized)

utils.standard_plot(df_gold_normal, column_name="Gold_Normal")
utils.pickle_object(df_gold_normal,
                    "../data/gold_daily_normal_cpi_monthly.pkl")
if __name__ == "__main__":
    pass
コード例 #20
0
ファイル: FXTOP.py プロジェクト: forlink/forex-2
    # date
    line = line.split(' ')[1]
    date = line.split('\t')[0]
    rate = line.split('\t')[1]
    date_components = date.split('/')
    year = date_components[0]
    month = date_components[1]
    day = date_components[2]
    date = year + '-' + month + '-' + day

    dates.append(date)

    # rate
    rate = float(rate.split('\n')[0])
    rates.append(rate)

dates = np.array(dates, dtype='datetime64[D]')[::-1]
rates = np.array(rates, dtype='float32')[::-1]

data = {'Date': dates,
        'KRW/USD': rates}

df = pd.DataFrame(data)
utils.pickle_object(df, "data/KRW_USD_{}.pkl".format(period))

# df = utils.load_pickle("data/KRW_USD_1997.pkl")
utils.standard_plot(df)

if __name__ == "__main__":
    pass