Example #1
0
def update(args):
    expression = "is_defined(core.archive_path)"
    if args.expression:
        expression += " and (%s)" % args.expression

    if args.action == 'pull':
        # only get products with a remote_url
        if expression:
            expression = "is_defined(remote_url) and (%s)" % expression
        else:
            expression = "is_defined(remote_url)"

    processor = UpdateProcessor(args)
    with muninn.open(args.archive) as archive:
        if args.action in ['ingest', 'pull']:
            # we only need the uuid and the product_name
            products = archive.search(expression, property_names=['uuid', 'product_name'])
        else:
            products = archive.search(expression, namespaces=archive.namespaces())
        if args.parallel:
            if args.processes is not None:
                pool = multiprocessing.Pool(args.processes)
            else:
                pool = multiprocessing.Pool()
            list(bar(pool.imap(processor, products), total=len(products)))
            pool.close()
            pool.join()
        else:
            for product in bar(products):
                processor.perform_operation(archive, product)

    return 0
Example #2
0
def ingest(args):
    processor = IngestProcessor(args)
    with muninn.open(args.archive) as archive:
        if "-" in args.path:
            paths = [path for path in sys.stdin]
        else:
            paths = args.path
        total = len(paths)
        num_success = 0
        if args.parallel:
            if args.processes is not None:
                pool = multiprocessing.Pool(args.processes)
            else:
                pool = multiprocessing.Pool()
            num_success = sum(
                list(bar(pool.imap(processor, paths), total=total)))
            pool.close()
            pool.join()
        elif total > 1:
            for path in bar(paths):
                num_success += processor.perform_operation(archive, path)
        elif total == 1:
            # don't show progress bar if we ingest just one item
            num_success = processor.perform_operation(archive, paths[0])

    return 0 if num_success == total else 1
Example #3
0
def update(args):
    expression = "is_defined(core.archive_path)"
    if args.expression:
        expression += " and (%s)" % args.expression

    namespaces = []
    if args.namespaces:
        for namespace in args.namespaces:
            namespaces += namespace.split(' ')

    if args.action == 'pull':
        # only get products with a remote_url
        if expression:
            expression = "is_defined(remote_url) and (%s)" % expression
        else:
            expression = "is_defined(remote_url)"

    with muninn.open(args.archive) as archive:
        products = archive.search(expression, namespaces=namespaces)
        if args.parallel:
            pool = multiprocessing.Pool()
            list(bar(pool.imap(Processor(args), products),
                     total=len(products)))
            pool.close()
            pool.join()
        else:
            update_func = Processor(args, archive)
            for product in bar(products):
                update_func(product)

    return 0
Example #4
0
    def decrypt(self, cipher_data, priv_key, output):

        # Encrypted text contains information on lenght and size of original message, which is necessary for decoding
        self.buf = cipher_data.split('::')
        self.msg_len, self.block_size, self.cipher_blocks = int(
            self.buf[0]), int(self.buf[1]), self.buf[2].split(',')
        self.priv_key = self.split_key(priv_key)
        """
        Cipher block = C
        Plain text block = M
        Private key[0] = N
        Private key[1] = D
        Then:
        M = C^D mod N
        """
        self.integer_blocks = []
        pbar = bar(desc="[Info] Decrypting")
        for block in self.cipher_blocks:
            pbar.update(1)
            plain_block = pow(int(block), int(self.priv_key[2]),
                              int(self.priv_key[1]))
            self.integer_blocks.append(plain_block)
        pbar.close()
        # Call dissasemble method to turn integer blocks back into plain text
        return super()._disassemble_blocks(self.msg_len, self.block_size,
                                           self.integer_blocks)
Example #5
0
    def encrypt(self, raw_data, pub_key, output, block_size):

        # Split public key into key parts
        self.block_size = block_size
        self.pub_key = self.split_key(pub_key)
        stdout.write("[Info] Formatting blocks...\n")
        if int(self.pub_key[0]) < 1024:
            stdout.write("[Error] Minimum keysize supported is 1024\n")
            exit()
        """ Encrypting blocks
        Cipher block = C
        Plain text block = M
        Public key[1] = N
        Public key[2] = E
        Then:
        C = M^E mod N
        """
        self.cipher_blocks = []
        pbar = bar(desc="[Info] Encrypting")
        for block in super()._assemble_raw_blocks(raw_data):
            pbar.update(1)
            self.cipher_blocks.append(
                pow(int(block), int(self.pub_key[2]), int(self.pub_key[1])))
        pbar.close()
        return self.cipher_blocks
Example #6
0
def genPrime(s):
    pbar = bar(desc="[Info] Generating prime pair")
    while True:
        pbar.update(1)
        # generate random number in range of 2^keysize -1 and 2^keysize
        n = randrange(2**(s - 1), 2**(s))
        if isPrime(n):
            pbar.close()
            return n
Example #7
0
    def process(self, archive, args, items):
        total = len(items)
        num_success = 0

        if args.parallel:
            num_success = sum(list(bar(_POOL.imap(self, items), total=total)))
            _POOL.close()
            _POOL.join()

        elif total > 1:
            for item in bar(items):
                num_success += self.perform_operation(archive, item)

        elif total == 1:
            # don't show progress bar if we ingest just one item
            num_success = self.perform_operation(archive, items[0])

        return 0 if num_success == total else 1
Example #8
0
def run(env, agent, n_episodes=200, render=False, jupyter=False):
    episode_scores = []
    print(f"####### Training Environment {env}, episodes {n_episodes} #######")
    pbar = jbar(range(n_episodes)) if jupyter else bar(range(n_episodes))
    for episode in pbar:
        episode_score = run_episode(env, agent, render)
        episode_scores.append(episode_score)
        pbar.set_description(f"Episode: {episode + 1}, Score: {episode_score}")
    return episode_scores
Example #9
0
dataset = "DUC06"
# path = "/home/orkan/Dropbox/measurements/sherlock/naive adaptive k/" + dataset + "/"
path = "/home/orkan/Dropbox/measurements/sherlock/naive adaptive k 2/" + dataset + "/"

topics = get_sorted_topics(path)

labels = ["0", "0.05", "0.1", "0.15", "0.2", "0.25"]
# labels = ["0", "0.25", "0.5", "0.75", "1"]
data = {r: {'k': [], 't': [], 'k_norm': [], 'no_of_iterations': []} for r in labels}
atts = ['k', 't']

iterations = 10
##############################################################

for topic in bar(topics):
    reader = MeasurementReader()

    if topic.startswith("D0611B"):
        continue
    # k Bereich pro adaptive k
    # durchschn. k pro adaptive k
    # zeit pro Iteration, durchschn.

    read_logs(reader, path, topic)

    reader.read_corpora_stats(stat_folder)
    reader.set_topic_rid()
    corpus_size = reader.get_corpus_stat("Corpus Size after")
    for k in reader.run_log['k']:
        for att in atts:
Example #10
0
import pandas as pd
import pickle
from tqdm import tqdm as bar

df = pd.read_html('http://ytenx.org/kyonh/sieux?page='+str(1))[0]
df.columns = df.iloc[0]
df = df.drop(0, 0)
df_list = [df]

for i in bar( range(2, 260) ):
    data = pd.read_html('http://ytenx.org/kyonh/sieux?page='+str(i))[0]
    data.columns = data.iloc[0]
    data = data.drop(0, 0)
    df_list.append(data)

df = pd.concat(df_list)
df = df.drop('次序', 1)
df = df.T
df.columns = df.iloc[0]
df = df.drop('小韻', 0)
pickle.dump(df, open('./pickles/rhyme_list.pkl', 'wb'))
Example #11
0
def run_grid(f_lifes,
             f_falses,
             Ns,
             sample,
             model,
             sample_args=(),
             model_args=(),
             test=tests.MannWhitney,
             N_runs=500,
             log=True,
             filename=None,
             do_bar=True,
             N_proc=1):
    # Print the current model/sample/test
    try:
        print("Running:  MODEL: {:s}    SAMPLE: {:s}    TEST: {:s}".format(
            model_names[model.__name__], sample_names[sample.__name__],
            tests.test_names[test.__name__]))
    except:
        pass

    # Generates the p value grid for the specific model, sample, and statistical test
    p, sig_p, nans = np.zeros((len(f_lifes), len(f_falses), len(Ns)),
                              dtype=float), np.zeros(
                                  (len(f_lifes), len(f_falses), len(Ns)),
                                  dtype=float), np.zeros(
                                      (len(f_lifes), len(f_falses), len(Ns)),
                                      dtype=int)
    if do_bar:
        bar0 = bar
    else:

        def bar0(arg):
            return arg

    # Calculates and saves the typical p-value and std deviation for a single combination of f_life, f_false, N
    def run_bin(i, j, k, p, sig_p, nans):
        p0 = np.zeros(N_runs, dtype=float)
        for ii in range(N_runs):
            ages, O2 = draw_sample(f_lifes[i],
                                   f_falses[j],
                                   int(Ns[k]),
                                   sample,
                                   model,
                                   sample_args=sample_args,
                                   model_args=model_args)
            if O2.sum() > 0 and (~O2).sum() > 0:
                p0[ii] = test(ages, O2, p_only=True)
            else:
                p0[ii] = np.nan

        # Get the mean and std deviation for this bin and count the number of nans
        if (~np.isnan(p0)).sum() > 0:
            if log:
                p[i, j, k], sig_p[i, j,
                                  k] = np.nanmean(np.log10(p0)), np.nanstd(
                                      np.log10(p0))
            else:
                p[i, j, k], sig_p[i, j, k] = np.nanmean(p0), np.nanstd(p0)
        else:
            p[i, j, k], sig_p[i, j, k] = np.nan, np.nan

        nans[i, j, k] = np.isnan(p0).sum()

    # Get every combination of f_life,f_false,N
    combos = []
    for i in range(len(f_lifes)):
        for j in range(len(f_falses)):
            for k in range(len(Ns)):
                combos.append((i, j, k))

    # Run each combo sequentially
    if N_proc == 1:
        for combo in bar(combos):
            run_bin(*combo, p, sig_p, nans)

    # Save the results to a specified file
    if filename is not None:
        try:
            pkl = {}
            pkl['f_life'], pkl['f_false'], pkl['N'], pkl['nans'] = np.array(
                f_lifes), np.array(f_falses), np.array(Ns), np.array(nans)
            if log:
                pkl['logp'], pkl['sig_logp'] = p, sig_p
            else:
                pkl['p'], pkl['sig_p'] = p, sig_p
            if not os.path.exists(ROOT_DIR + '/results'):
                os.mkdir(ROOT_DIR + '/results')
            pickle.dump(pkl, open(filename, 'wb'))
            print("Saved {:s}".format(filename))
        except:
            print("Error saving file: {:s}".format(filename))

    return p, sig_p, nans
Example #12
0
    label = 'E'
    pred_lab = VecContext.y2lab(pred)
    truth_lab = VecContext.y2lab(truth)
    P = metrics.flat_precision_score(truth_lab, pred_lab, pos_label=label)
    R = metrics.flat_recall_score(truth_lab, pred_lab, pos_label=label)
    f1 = metrics.flat_f1_score(truth_lab, pred_lab, pos_label=label)
    print(clf_name)
    print(
        metrics.flat_classification_report(truth_lab,
                                           pred_lab,
                                           labels=('I', 'E'),
                                           digits=4))
    csv_table.writerow([clf_name, P, R, f1])


for clf_name, clf in bar(clf_list):
    clf.fit(x_train, y_train)
    pred_vec = clf.predict(x_test)
    report(pred_vec, y_test, result_table, clf_name)

# lgb
x_train, x_valid, y_train, y_valid = train_test_split(x_train,
                                                      y_train,
                                                      test_size=0.1,
                                                      shuffle=True)

train_data = lgb.Dataset(x_train, y_train)
valid_data = lgb.Dataset(x_valid, y_valid, reference=train_data)

params = {
    'task': 'train',
Example #13
0
        "ПЛОЩАДЬ ПРОЧИХ ХОЛОДНЫХ ПОМЕЩЕНИЙ", "ПЛОЩАДЬ НЕЖИЛАЯ",
        "ПЛОЩАДЬ ОБЩАЯ НЕЖИЛЫХ ПОДВАЛОВ", "ПЛОЩАДЬ ОБЩАЯ НЕЖИЛЫХ ЦОКОЛЬНЫХ",
        "ПЛОЩАДЬ ОБЩАЯ ЖИЛЫХ ПОМЕЩЕНИЙ", "ПЛОЩАДЬ ЖИЛАЯ ЖИЛЫХ ПОМЕЩЕНИЦ",
        "КОЛИЧЕСТВО ЖИЛЫХ ПОМЕЩЕНИЙ", "КОЛИЧЕСТВО ЖИЛЫХ КОМНАТ",
        "КОЛИЧЕСТВО ОДНОКОМНАТНЫХ КВАРТИО",
        "ОБЩАЯ ПЛОЩАДЬ ОДНОКОМНАТНЫХ КВАРТИР",
        "ЖИЛАЯ ПЛОЩАДЬ ОДНОКОМНАТНЫХ КВАРТИР",
        "КОЛИЧЕСТВО ДВУХКОМНАТНЫХ КВАРТИО",
        "ОБЩАЯ ПЛОЩАДЬ ДВУХКОМНАТНЫХ КВАРТИР",
        "ЖИЛАЯ ПЛОЩАДЬ ДВУХКОМНАТНЫХ КВАРТИР",
        "КОЛИЧЕСТВО ТРЕХКОМНАТНЫХ КВАРТИО",
        "ОБЩАЯ ПЛОЩАДЬ ТРЕХКОМНАТНЫХ КВАРТИР",
        "ЖИЛАЯ ПЛОЩАДЬ ТРЕХКОМНАТНЫХ КВАРТИР",
        "КОЛИЧЕСТВО ЧЕТЫРЕХКОМНАТНЫХ КВАРТИО",
        "ОБЩАЯ ПЛОЩАДЬ ЧЕТЫРЕХКОМНАТНЫХ КВАРТИР",
        "ЖИЛАЯ ПЛОЩАДЬ ЧЕТЫРЕХКОМНАТНЫХ КВАРТИР",
        "КОЛИЧЕСТВО ПЯТИКОМНАТНЫХ КВАРТИР",
        "ОБЩАЯ ПЛОЩАДЬ ПЯТИКОМНАТНЫХ КВАРТИР",
        "ЖИЛАЯ ПЛОЩАДЬ ПЯТИКОМНАТНЫХ КВАРТИР", "ВСЕГО КВАРТИР КОЛИЧЕСТВО",
        "ВСЕГО В КВАРТИРАХ КОМНАТ", "ВСЕГО В КВАРТИРАХ ПЛОЩАДЬ ОБЩАЯ",
        "ВСЕГО В КВАРТИРАХ ПЛОЩАДЬ ЖИЛАЯ", "СЕРИЯ ПРОЕКТА",
        "КОЛИЧЕСТВО БАССЕЙНОВ", "ПРИЗНАК СТУДЕНТЧЕСКОГО ОБЩЕЖИТИЯ",
        "СЧИТАТЬ ОТДЕЛЬНЫМ КОРПУСОМ", "НАЛИЧИЕ МАНСАРДЫ", "UNOM 2"
    ]

    for ind, val in bar(enumerate(true_ans)):
        if not pd.isna(val):
            excel_2.loc[ind, colums] = excel_1.iloc[int(val)][colums]

    excel_2.to_excel("5.xlsx")
Example #14
0
old_pop = init_fitness(
    old_pop,
    n_pop,
    bounds,
    diameter,
    height,
    z_0,
    windspeed_array,
    theta_array,
    wind_prob,
)

a = time()

try:
    for gen in bar(range(generations)):
        elite(new_pop, old_pop, elit_num)
        cross(new_pop, old_pop, n_pop, n_var, elit_num, cross_num, tour_size)
        mutate(new_pop, old_pop, n_pop, mutat_num, n_var, mutat_genes, b_range)
        new_pop = fitness(
            new_pop,
            n_pop,
            elit_num,
            bounds,
            diameter,
            height,
            z_0,
            windspeed_array,
            theta_array,
            wind_prob,
        )
def Frequency_Analysis(username, prname):
    import pandas as pd
    from collections import Counter
    from tqdm import tqdm as bar
    from .utils import Read_Arg_, import_dataframe, export_dataframe
    input_directory = "/".join([username, prname])  # Non-창민버전

    if prname is not None:  # prname이 써있는 것 -> 타 함수 내에서 사용 : 딕셔너리 사용.
        for_cooc = 0  # 순수하게 Frequency_Analysis를 해야할 우 ->
        ref, input_, output_ = Read_Arg_("Frequency_Analysis")
        Frequency_Gap = int(ref) / 100
        text = import_dataframe(input_)
    else:  # 분석할 textfile을 username에 적는다.
        for_cooc = 1
        ref, _, _ = Read_Arg_("Frequency_Analysis", isind=1)
        Frequency_Gap = int(ref) / 100
        text = import_dataframe(username)

    def get_contents(item):
        if item != "":
            # not_language = re.compile('[^ ㄱ-ㅎㅣ가-힣|a-z|A-Z]+')
            # item = re.sub(not_language,"",str(item))
            contents.append(str(item).lower().strip())

    contents = []
    tag_contents = []
    text.contents.apply(get_contents)

    for token in contents:  # 요 파트를 스페이스 기준이 아닌걸로 수정해야 한다.
        for word in str(token).split(" "):
            if len(str(word)) > 1:
                tag_contents.append(word)
    counted_contents = Counter(tag_contents)

    tag_count = []

    for n, c in counted_contents.most_common():
        dics = {"tag": n, "count": c}
        tag_count.append(dics)

    df_tag_count = pd.DataFrame(tag_count)
    df_tag_count = df_tag_count[df_tag_count["count"] >= 50].sort_values(
        by="tag").reset_index(drop=True)
    iterations = len(df_tag_count)
    row_num = 0

    total = bar(range(iterations - 1), desc="comparing...")
    for t in total:
        step = t + 1
        std_row = df_tag_count.iloc[row_num]
        comparison_row = df_tag_count.shift(-1).iloc[row_num]
        std_tag = str(std_row["tag"])
        std_count = std_row["count"]
        comparison_tag = str(comparison_row["tag"])
        comparison_count = comparison_row["count"]

        if std_tag == comparison_tag[:len(std_tag)]:
            frequency_gap = abs(std_count - comparison_count)
            if frequency_gap / std_count < Frequency_Gap:
                df_tag_count.iloc[row_num + 1,
                                  1] = comparison_count + std_count
                df_tag_count = df_tag_count[
                    df_tag_count["tag"] != std_tag].reset_index(drop=True)
            else:
                row_num = row_num + 1
                continue
        else:
            row_num = row_num + 1
            continue
        if step == iterations - 1:
            break

    df_tag_count = df_tag_count.sort_values(
        by="count", ascending=False).reset_index(drop=True)

    if for_cooc == 0:
        export_dataframe(df_tag_count, output_)
    else:
        pass

    return df_tag_count