Ejemplo n.º 1
0
 def _process_item(self, download_url, result_url, **kw):
     token = kw.get('security_token', '')
     tmp = TemporaryDirectory(prefix='msds-', dir=WORKDIR)
     outdir = os.path.join(tmp.name, 'out')
     json_file = os.path.join(outdir, 'all.json')
     result_file = os.path.join(outdir, 'single_chem.json')
     r = requests.get(download_url)
     if r.status_code != 200:
         return
     with open(os.path.join(tmp.name, 'sdb.pdf'), 'wb') as fp:
         fp.write(r.content)
     sdbparser.batch_call(outdir, [tmp.name], True, UBA_FILE)
     if not os.path.isfile(json_file):
         return
     with open(json_file, encoding='utf-8') as fp:
         data = json.load(fp)
     try:
         prepare.prepare_data(data[0], outdir)
     except:
         pass
     if os.path.isfile(result_file):
         with open(result_file, encoding='utf-8') as fp:
             result = json.load(fp)
         result['security_token'] = token
         requests.post(result_url, json=result)
Ejemplo n.º 2
0
def main():
    """Get shortest path for given parameters."""

    parser = argparse.ArgumentParser(description='Shortest Route')
    parser.add_argument('-g',
                        '--graph',
                        help='input graph file" ',
                        type=str,
                        dest="graph",
                        required=True)
    parser.add_argument('-s',
                        '--start',
                        help='start node" ',
                        type=str,
                        dest="start",
                        required=True)
    parser.add_argument('-f',
                        '--finish',
                        help='finish node" ',
                        type=str,
                        dest="finish",
                        required=True)
    args = parser.parse_args()

    node_dictionary = prepare_data(args.graph)
    route = prepare_routes(node_dictionary)
    route.find_shortest_route(args.start, args.finish)
    print(route.get_route_distance(route.shortest_route))
Ejemplo n.º 3
0
def predict(predict_conf):
    # load data
    _, data = load_pkl_data(predict_conf.path_data)

    # load model meta data
    meta = load_pkl_data(predict_conf.path_meta)
    meta_image_shape = meta['ModelConf'].img_shape
    meta_re_sample_type = meta['ModelConf'].img_re_sample
    meta_text_len = meta['ModelConf'].text_length
    meta_label_num = len(meta['label2id'])
    meta_id2label = {v: k for k, v in meta['label2id'].items()}

    # load model
    model = keras.models.load_model(predict_conf.path_model, custom_objects={
        "CoAttentionParallel": CoAttentionParallel
    })

    # prepare data
    _, _, data_test = prepare_data(data, meta_image_shape, meta_re_sample_type,
                                   meta_text_len, meta_label_num, 0, 0)

    # predict with trained model
    x_test, y_test = data_test
    y_predict = model.predict(x_test)
    y_true = y_test.tolist()

    # save predictions
    save_pkl_data(predict_conf.path_predictions, [y_predict, y_test])

    # print metric results
    scores = evaluate(y_true, y_predict, predict_conf.threshold)
    label_names = [meta_id2label[i] for i in range(len(meta_id2label))]
    display_scores(scores, label_names)
Ejemplo n.º 4
0
def split_data_with_conf(data, label_size, train_conf, model_conf):
    train_ratio, valid_ratio, test_ratio = normalize_data_ratio(
        train_conf.train_ratio, train_conf.valid_ratio, train_conf.test_ratio)
    data_train, data_valid, data_test = prepare.prepare_data(
        data, model_conf.img_shape, model_conf.img_re_sample,
        model_conf.text_length, label_size, train_ratio, valid_ratio)

    return data_train, data_valid, data_test
Ejemplo n.º 5
0
def calculate_idf_score(df):

    df = pd.read_json("data.json")
    df = prepare.prep_readme_data(df)
    df = prepare.prepare_data(df)

    languages = df.is_top_language.unique()
    idf_scores = pd.DataFrame()
    for language in languages:
            words = clean(' '.join(df[df.is_top_language == language].clean_lemmatized))
            idf_df = return_words_with_idf(words)
            idf_df["language"] = language
            
            idf_scores = pd.concat([idf_scores, idf_df])
    return idf_scores
Ejemplo n.º 6
0
def return_words_with_idf(words):

    df = pd.read_json("data.json")
    df = prepare.prep_readme_data(df)
    df = prepare.prepare_data(df)
    
    def idf(word):
        return  df.shape[0] / (1 + (df.clean_lemmatized.str.contains(word)).sum())

    # put the unique words into a data frame
    idf_df = (pd.DataFrame(dict(word=words))
    # calculate the idf for each word 
    .assign(idf=lambda df: df.word.apply(idf))
    # sort the data for presentation purposes
    .set_index('word')
    .sort_values(by='idf', ascending=False)
    .head(5))
    
    return idf_df
Ejemplo n.º 7
0
def main():

    subgraphs, ids_by_length, ids_by_number_of_matched_files, lengths, jsons = prepare_data(
        DATE)
    subgraphs = sort_subgraphs(subgraphs, lengths, ids_by_length)

    #file_length('116746_gd1990-03-14s1t02.flac', lengths)

    #json.dump(jsons, open('jsons.json', 'w'))
    #json.dump(lengths, open('lengths.json', 'w'))
    #json.dump(subgraphs, open('subgraphs.json', 'w'))
    #sys.exit()
    all_partitions = []
    partition_jkeys = []

    for sub in subgraphs:
        chains = []  # json keys of chained alignments

        sub_partitions = []

        for s in list(sub.values())[0]:
            #if len(s) > 1:
            if len(s) > 1:
                jkey = track_tuple_to_json_id((s[0], s[1]))
                chains.append(s + list(sub.keys()))
            else:
                jkey = track_tuple_to_json_id((s[0], list(sub.keys())[0]))
            dtw = jsons[jkey]['dtw']
            dtw = [[x[1], x[0]] for x in dtw
                   ]  #swap columns to match order of file names/lengths
            tuning_diff = jsons[jkey]['tuning_diff']
            partitions = get_partition_bounds(dtw, jkey)

            partitions = fill_gaps(jkey, partitions, lengths,
                                   jsons[jkey]['tuning_diff'])

            all_partitions.append(partitions)

            partition_jkeys.append(jkey)

            target_folder = os.path.join('plots', DATE)
            if not os.path.exists(target_folder):
                os.mkdir(target_folder)

            fname = f'{target_folder}/{jkey}'
            #print(fname)
            #json.dump(sorted(partitions, key=lambda x: x[0][0]), open(fname+'.json', 'w'))
            #sys.exit()
            #plotFigure(partitions, jkeys[0], lengths, fname, dtw, jsons)

            #break

        for c in chains:
            all_partitions, partition_jkeys = process_chain(
                c, all_partitions, partition_jkeys, jsons, lengths)
            #break
        #json.dump(all_partitions, open('all_partition.json', 'w'))
        #break

    all_partitions, partition_jkeys = cleanResult(subgraphs, all_partitions,
                                                  partition_jkeys)

    result = {}
    for key, value in zip(partition_jkeys, all_partitions):
        result[key] = value

    result['unmatched'] = jsons['unmatched']

    json.dump(result, open('all_partition.json', 'w'))

    #json.dump(result, open('all_partition.json', 'w'))
    #pprint(partition_jkeys)

    #timelines =
    #plot_timelines(timelines, names, outfile)
    '''
Ejemplo n.º 8
0
    parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
    # parser.add_argument("--ignore_display", dest="display", action="store_false", default=True)
    parser.add_argument("--display", action="store_true", default=False)
    parser.add_argument("--frame_interval", type=int, default=1)
    parser.add_argument("--display_width", type=int, default=800)
    parser.add_argument("--display_height", type=int, default=600)
    parser.add_argument("--save_path", type=str, default="./output/")
    parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True)
    parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1")
    return parser.parse_args()
    
if __name__ == "__main__":
    print('start mot')
    
    start_time = time.time()
    args = parse_args()

    prepare_data(args.data_path)
    tracks = os.listdir(r'./dataset/test-c')
    for track in tracks:
        track_data_folder = os.path.join(r'./dataset/test-c', track, 'img1')
        im = cv2.imread(os.path.join(track_data_folder, '00000.jpg'))
        i_h, i_w,_ = im.shape
        os.system('cgexec -g memory:myGroup python vis_zhongxing.py --data_path '+track_data_folder+'--track_name'+track)
        post_process(args.result_path, './output/'+track+'.txt')


    
        
    
    
Ejemplo n.º 9
0
def pred_death_P():
    predict = pre.predict_death_P()
    res = predict.astype(str).to_json(orient='records')
    return res


@app.route("/pred_death_A")
# Prédiction du nombre décès - Modèle Prophet
def pred_death_A():
    res = pre.predict_death_A()
    res = res.to_json(orient='records')
    return res


if __name__ == "__main__":
    pre.prepare_data()
    cursor = hive.connect(host='localhost').cursor()
    os.system("docker cp res.csv server_hive-server_1:/opt/hive/bin/res.csv")
    # Supprime l'ancien table
    cursor.execute("""DROP TABLE IF EXISTS covid""")
    # Créer la table si besoin et ignorer la prèmiere ligne de fichier csv
    cursor.execute(
        """CREATE TABLE IF NOT EXISTS covid(country STRING,prov STRING,confirm INT, recov INT, death INT,jour STRING) 
        ROW FORMAT DELIMITED
        FIELDS TERMINATED BY ';'
        tblproperties('skip.header.line.count'='1')""")
    cursor.execute(
        "LOAD DATA LOCAL INPATH '/opt/hive/bin/res.csv' OVERWRITE INTO TABLE covid"
    )
    app.run()
def split_scale_data():
    df = prepare_data()
    train, validate, test = split_data(df)
    #Select features to be scaled
    X = train.select_dtypes(include=['float']).columns
    return scale_data(train, validate, test, X)
Ejemplo n.º 11
0
def train(model_conf, train_conf):
    # set up random seed
    random.seed(train_conf.random_seed)

    # check the output path
    if not os.path.exists(train_conf.path_output):
        os.makedirs(train_conf.path_output)

    # load and statistics
    (vocab2id, label2id), data = load_pkl_data(train_conf.path_data)
    id2vocab = {v: k for k, v in vocab2id.items()}
    id2label = {v: k for k, v in label2id.items()}
    token_size, label_size = len(id2vocab), len(id2label)
    label_names = [id2label[i] for i in range(len(id2label))]
    print('label size:', label_size, 'token size:', token_size)
    print('label names:', label_names)

    # split data
    train_ratio, valid_ratio, test_ratio = normalize_data_ratio(
        train_conf.train_ratio, train_conf.valid_ratio, train_conf.test_ratio)
    data_train, data_valid, data_test = prepare.prepare_data(
        data, model_conf.img_shape, model_conf.img_re_sample,
        model_conf.text_length, label_size, train_ratio, valid_ratio)
    (x_train, y_train), (x_valid,
                         y_valid), (x_test,
                                    y_test) = data_train, data_valid, data_test
    print('train: {0}; valid: {1}; test: {2}'.format(len(y_train),
                                                     len(y_valid),
                                                     len(y_test)))

    # train and test
    scores = []
    predict_threshold = 0.5
    for i in range(train_conf.repeat_times):
        print('{sp}\ntime {i}\n{sp}'.format(sp='=' * 20, i=i))
        # prefix to save the training process
        path_prefix = os.path.join(
            train_conf.path_output,
            'model_{}_{}'.format(train_conf.code_name, i))

        # create and train the model
        model = create_model_with_conf(token_size, label_size, model_conf)
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        # init callbacks
        path_cp = path_prefix + '.cp'
        es = EarlyStopping(monitor=train_conf.monitor_type,
                           patience=train_conf.early_stop_patients)
        cp = ModelCheckpoint(filepath=path_cp,
                             monitor=train_conf.monitor_type,
                             save_best_only=True)

        # fit the model
        history = model.fit(x_train,
                            y_train,
                            batch_size=train_conf.batch_size,
                            epochs=train_conf.epochs,
                            verbose=train_conf.verbose,
                            validation_data=(x_valid, y_valid),
                            callbacks=[cp, es])

        # save training history
        save_on_condition(train_conf.is_log_history, path_prefix + '.his',
                          history.history)
        # save the trained model
        model.save(path_prefix + '.h5')
        # save the training meta data, e.g., TrainConf, vocab2id, label2id
        save_pkl_data(path_prefix + '.meta', {
            'ModelConf': model_conf,
            'vocab2id': vocab2id,
            'label2id': label2id
        })

        # test if test_ratio > 0
        if test_ratio > 0:
            # predict with trained model
            model.load_weights(path_cp)
            y_predict = model.predict(x_test)
            y_true = y_test.tolist()

            # save prediction
            if train_conf.is_log_prediction:
                path_predict = path_prefix + '.predictions'
                save_pkl_data(path_predict, [y_predict, y_test])

            # evaluate
            scores_current = metrics.evaluate(y_true, y_predict,
                                              predict_threshold)
            metrics.display_scores(scores_current, label_names)
            scores.append(scores_current)

        # prepare for the next loop
        if train_conf.is_data_refresh:
            data_train, data_valid, data_test = prepare.prepare_data(
                data, model_conf.img_shape, model_conf.img_re_sample,
                model_conf.text_length, label_size, train_ratio, valid_ratio)
            (x_train,
             y_train), (x_valid,
                        y_valid), (x_test,
                                   y_test) = data_train, data_valid, data_test

    if test_ratio > 0 and len(scores) > 0:
        # average score
        avg_scores = metrics.compute_mean_var(scores)
        metrics.display_average_scores(avg_scores, label_names,
                                       train_conf.repeat_times)

        # store average score
        if train_conf.is_log_avg_score:
            path_avg = os.path.join(
                train_conf.path_output,
                'result_{}.avg.txt'.format(train_conf.code_name))
            with codecs.open(path_avg, mode='w', encoding='UTF-8') as fo:
                metrics.display_average_scores(avg_scores,
                                               label_names,
                                               train_conf.repeat_times,
                                               is_k_print=True,
                                               fo=fo)
Ejemplo n.º 12
0
@Email   : [email protected]
@File    : main.py
'''

import cv2
from SVM import SVM
from prepare import prepare_data

cell_class = {11:'EOSINOPHIL',
              22:'LYMPHOCYTE',
              33:'MONOCYTE',
              44:'NEUTROPHIL'}

types = ['hog', 'gray', 'rgb', 'hsv']
feature_type = types[3]

img_path = './data/test_data/LYMPHOCYTE/_0_1050.jpeg'  #adjust the test image
img=cv2.imread(img_path)
cv2.putText(img,'LYMPHOCYTE',(23,45),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255),1)
cv2.imshow('Result',img)
cv2.waitKey()

svm=SVM()
data=prepare_data(feature_type)
print('data:',data)

svm.train(data)
img=cv2.imread(img_path)
ID_num=svm.predict(img,feature_type)

Ejemplo n.º 13
0
import tensorflow as tf
import random
from prepare import prepare_data, get_batch_data


class_num = 2
learning_rate = 0.0005
training_epochs = 20
batch_size = 20
width = 128
height = 128
data = prepare_data(class_num)

keep_prob = tf.placeholder(tf.float32)
input_image = tf.placeholder(tf.float32, [None, width, height, 3])
label = tf.placeholder(tf.float32, [None, class_num])

filters = {
    'cf1': tf.Variable(tf.random_normal([3, 3, 3, 32], stddev=0.01)),
    'cf2': tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01)),
    'cf3': tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01)),
    'cf4': tf.Variable(tf.random_normal([3, 3, 128, 256], stddev=0.01))
}

cl1 = tf.nn.conv2d(input_image, filters['cf1'], strides=[1, 1, 1, 1], padding='SAME')
cl1 = tf.nn.max_pool(cl1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#cl1 : [-1, width / 2, height / 2, 32]

cl2 = tf.nn.conv2d(cl1, filters['cf2'], strides=[1, 1, 1, 1], padding='SAME')
cl2 = tf.nn.relu(cl2)
cl2 = tf.nn.max_pool(cl2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
Ejemplo n.º 14
0
import pandas as pd
import matplotlib.pyplot as plt
import acquire
import prepare
import math
from sklearn import metrics

df = acquire.acquire_data()
df = prepare.prepare_data(df)


def split_store_data(df, train_prop=.7):
    train_size = int(len(df) * train_prop)
    train, test = df[0:train_size], df[train_size:len(df)]
    return train, test


train, test = split_store_data(df)
target_vars = ['steps']
yhat = pd.DataFrame(test[target_vars])


def evaluate(target_var, train=train, test=test, output=True):
    mse = metrics.mean_squared_error(test[target_var], yhat[target_var])
    rmse = math.sqrt(mse)

    if output:
        print('MSE:  {}'.format(mse))
        print('RMSE: {}'.format(rmse))
    else:
        return mse, rmse
Ejemplo n.º 15
0
            print(said)
        except Exception as e:
            print("Exception: " + str(e))

    return said


tags = []  # Contains all the different tags
all_questions_list = [
]  # Contains the different question with their words tokenized
questions_tags = [
]  # Contains the questions tags corresponding to the questions in above list
all_question_words = [
]  # Contains all the words in all the questions of the dataset

pr = prepare_data(data)
all_question_words, tags, all_questions_list, questions_tags = pr.prepare(
    data, "intents", "all_questions", "tag")

all_questions_train = []
tags_output = []

all_questions_train, tags_output = pr.get_training_set()
all_questions_train = np.array(all_questions_train)
tags_output = np.array(tags_output)

tf.reset_default_graph()
model = create_model(all_questions_train, tags_output, tags,
                     all_question_words)
model.fit_model(all_questions_train, tags_output)
Ejemplo n.º 16
0
def prepare(args, config):
    logger = logging.getLogger('BugLoc')
    logger.info('Preparing data ...')
    generate_ast(args, config)
    prepare_data(args, config)
    logger.info('Done preparing data...')