Ejemplo n.º 1
0
def predict_dir(caffemodel, deploy, file_list, IMAGE_SIZE=227, LAYER_NAME="my-fc8", mode=True):
    images = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float)

    # read age list
    real_ages = []
    for index, dicom_file in enumerate(file_list):
        print dicom_file
        real_age = info.getInfo(dicom_file)
        real_ages.append(real_age)
        images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE)
    if mode:
        caffe.set_mode_gpu()
    else:
        caffe.set_mode_cpu()
    net = caffe.Net(deploy, caffemodel, caffe.TEST)
    for index, dicom_file in enumerate(file_list):
        real_age = info.getInfo(dicom_file)
        real_ages.append(real_age)
        images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE)
    net.blobs['data'].reshape(len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE)
    net.blobs['data'].data[...] = images
    output = net.forward()
    MAE_SUM = 0.0
    for index, result in enumerate(output[LAYER_NAME]):
        predict_age = result[0]
        real_age = real_ages[index]
        '''
        the condition that you think the prediction result is correct
        '''
        MAE_SUM += abs(predict_age-real_age)
        print(abs(predict_age-real_age))
    print MAE_SUM
    return MAE_SUM/len(file_list)
 def preproc():
     sym = txt.get()
     if sym != "":
         pre.process(sym)
         print("preprocess")
         tm.showinfo("Input", "Preprocess Successfully Finished")
     else:
         tm.showinfo("Input error", "Select Dataset")
Ejemplo n.º 3
0
 def preprocess():
     sym = txt.get()
     res = ""
     message.configure(text=res)
     if sym != "":
         pre.process(sym)
         print("preprocess")
         tm.showinfo("Input", "Preprocess Successfully Finished")
     else:
         tm.showinfo("Input error", "Select Dataset")
Ejemplo n.º 4
0
def main():
    model = pickle.loads(open('models/LogRegression_thre1'))
    # provide your filename here
    process(filename='your_file_name.csv')
    datadf = pd.read_csv(FILENAME)
    datadf = datadf.drop(datadf.columns[[0]],axis=1)
    datadf = (datadf-datadf.mean())/(datadf.max()-datadf.min())
    X = np.array(datadf)
    predictions = model.predict(X)
    # consider the predicted rating to be in the range of +.- 1
    # for example of predicted is 7 then it may be between 6-8
    print predictions
Ejemplo n.º 5
0
def data_write_disk():
    test = P.process()

    #return test
    test = map(lambda t: ', '.join(str(x) for x in t), test)
    test = map(lambda t: t+"\n", test)
    #t = ", ".join(test)
    #print "precision: ", test[0]
    #print "recall : ", test[1]
    #print "f1 : ", test[2]

    #print test[0]
    #print test[1]
    #print test[2]

    ##print NP.mean(test[0])
    ##print NP.mean(test[1])
    ##print NP.mean(test[2])
    #print "precision:", NP.mean(test[0])
    #print "recall:", NP.mean(test[1])
    #print "f1:", NP.mean(test[2])
    with open("datasample.csv", "a") as f:
        f.writelines(test)

    GT.dump_call_bb_list()
Ejemplo n.º 6
0
def genModel(artist, song, model, embedDim, interval, distance):
    XTrain, yTrain, XPredict, yTest, mean, var = pp.process(artist, song, embedDim, interval, distance)
    yPredict = model.train(XTrain, yTrain, XPredict)
    yTest = yTest * var + mean
    yPredict = yPredict * var + mean
    yPredict[yPredict < 0] = 0  # 预测值出现负数直接归零
    return yPredict, yTest
Ejemplo n.º 7
0
def generateHdf5_fromfilelist(source_list, target):
    h5_file = hy.File(target, 'w')
    file_list = []
    for source in source_list:
        for root, dirs, files in os.walk(source):
            for dicom_file in files:
                file_list.append(os.path.join(root, dicom_file))
    random.shuffle(file_list)
    random.shuffle(file_list)
    random.shuffle(file_list)
    # change the image size to you want
    IMAGE_SIZE = 224
    data = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE))
    labels = np.zeros(len(file_list), dtype=np.float32)
    for index, dicom_file in enumerate(file_list):
        age = info.getInfo(dicom_file)
        im = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE)
        data[index, :, :, :] = im
        labels[index] = age
        print(dicom_file, age)
    h5_file['data'] = data
    h5_file['label'] = labels

    print(labels)
    h5_file.close()
Ejemplo n.º 8
0
def dump_distribution():
    test = P.process()
    #print test

    bl = GT.dump_call_bb_list()
    #print bl
    
    t = []
    for k,v in test.items():
	#if int(k) in bl:
	if True:
 	    #print k, v
	    t.append(v)


    #t1 = map(lambda e: (e[0], e[1], e[2]*e[3]*e[4]), t)
    t1 = map(lambda e: (e[0], e[1], e[2], e[3], e[4]), t)

    #print t1
    k = len(t1[0])
    kk = list(product(range(2), repeat=k))

    res = [0] * len(kk)

    for t11 in t1:
        i = kk.index(t11)
	#print i, t11
	res[i] += 1

    res = map(lambda i: float(i)/len(t1), res)

    print "distribution:", res
    print "length:", len(t1)
Ejemplo n.º 9
0
async def analyze():
    result = await request.body
    result = json.loads(result.decode("utf-8"))
    if result["tweets"]:
        before = time()
        result["vaildAccount"] = True
        merged_tweets = process(result["tweets"])
        result["year"], result["month"] = year_month(result["info"])
        result["monthGraph"] = month_graph(result["tweets"])
        result["hourGraph"] = hour_graph(merged_tweets)
        result["dayGraph"] = day_graph(merged_tweets)
        result["dayTimeGraph"] = day_time_graph(result["tweets"])
        result["region"], result["regionProba"] = predict_region(
            merged_tweets, xgb_region)
        result["country"], result["countryProba"] = predict_country(
            merged_tweets, xgb_country, tfidf_vectorizer)
        result["wordcloud"] = wordcloud(merged_tweets)
        result["sentimentGraph"] = sentiment_graph(merged_tweets,
                                                   result["tweets"])
        result["hmuGraph"] = hmu_graph(result["tweets"])
        result["hashtagGraph"] = hashtag_graph(result["tweets"])
        result["logScale"] = log_scale(result["tweets"])
        result["mentions"] = mentions(result["tweets"])
        result["urls"] = urls(result["tweets"])
        result["took"] = f"{time()-before} seconds"
        print(result["took"])
        return jsonify(result)
    else:
        result["vaildAccount"] = False
        return jsonify(result)
Ejemplo n.º 10
0
def predict(net, comments, sequence_length=50, train_on_gpu=False):

    net.eval()
    #preprocess:
    cleaned_comments = prepros(comments)
    #print(cleaned_comments)
    #process:
    features = process(cleaned_comments)
    #print(features)
    feature_tensor = torch.from_numpy(features)
    feature_tensor = feature_tensor.type(torch.LongTensor)
    batch_size = feature_tensor.size(0)
    #print(feature_tensor.size(0))
    # initialize hidden state
    h = net.init_hidden(batch_size)

    if (train_on_gpu):
        feature_tensor = feature_tensor.cuda()

    # get the output from the model
    output, h = net(feature_tensor, h)
    #print(output.squeeze())
    # convert output probabilities to prediction
    pred = torch.argmax(output, dim=1)
    # printing output value, before rounding
    #print(pred)
    return pred, output, cleaned_comments
def output_features_of_dataset(source,
                               caffemodel,
                               deploy_file,
                               IMAGE_SIZE=227,
                               gpu_mode=True,
                               LAST_LAYER_NAME="ip1",
                               batch_size=240,
                               save_file_path="./features.txt"):
    if gpu_mode:
        caffe.set_mode_gpu()
    else:
        caffe.set_mode_cpu()
    net = caffe.Net(deploy_file, caffemodel, caffe.TEST)
    samples = []
    for dir_name in os.listdir(source):
        one_person_dir = os.path.join(source, dir_name)
        for file_name in os.listdir(source):
            one_person_pic_path = os.path.join(one_person_dir, file_name)
            samples.append((dir_name, file_name, one_person_pic_path))
    data = np.zeros((batch_size, 3, IMAGE_SIZE, IMAGE_SIZE))
    with open(save_file_path, "w") as f:
        for index, sample in enumerate(samples):
            t = index % batch_size
            data[t, :, :, :] = preprocess.process(sample[2], IMAGE_SIZE)
            if t == 0:
                net.blobs['data'].data[...] = data
                output = net.forward()
                features = output[LAST_LAYER_NAME]
                lines = [
                    "%s %s %s\n" % (s[0][0], s[0][1], " ".join(s[1]))
                    for s in zip(samples[index - 50:index], features)
                ]
                f.writelines(lines)
Ejemplo n.º 12
0
def read_files(file):
    with open(file) as f:
        full_lines = ''
        for i in f.readlines():
            full_lines+=i
    file_result = json.loads(json.dumps(process(full_lines)))
    return file_result
Ejemplo n.º 13
0
def main_op():
    review_spirit = w.get('1.0', END)
    demo = process(review_spirit)
    demo1 = create_word_features(demo)
    demo2 = ('Review : ' + clf.classify(demo1))
    l2 = Label(bottom_frame, text=demo2)
    l2.pack()
def get_trainval_data(batch_size, train_percent, num_workers=1, data_dir='../data/', num_dir=1, IM_SIZE=(160, 160),
                      target_im_size=(128, 128), threshold=0.5, transform=True, no_cut_select=0.4):
    wear_cut, no_wear_cut, _, _ = process(crop_size=IM_SIZE[0], data_dir=data_dir, num_dir=num_dir, threshold=threshold)
    images, labels = Imdb(wear_cut, no_wear_cut, no_cut_select=no_cut_select)

    train_idx = random.sample(range(0, len(images)), int(len(images) * train_percent))

    mask = np.zeros(len(images), dtype=bool)
    mask[train_idx] = True

    train_images = np.asarray(images)[mask]
    train_labels = np.asarray(labels)[mask]

    val_images = np.asarray(images)[~mask]
    val_labels = np.asarray(labels)[~mask]

    train_dataset = DatasetTrainVal(train_images, train_labels, target_im_size=target_im_size,
                                    transform=transform)  # TODO: initialise
    val_dataset = DatasetTrainVal(val_images, val_labels, target_im_size=target_im_size,
                                  transform=transform)  # TODO: initialise

    train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True,
                                       num_workers=num_workers)
    val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=True,
                                     num_workers=num_workers)

    return train_dataloader, val_dataloader
Ejemplo n.º 15
0
def genModel(artist, song, model, embedDim, interval, distance):
    XTrain, yTrain, XPredict, yTest, mean, var = pp.process(artist, song, embedDim, interval, distance)
    yPredict = model.train(XTrain, yTrain, XPredict)
    yTest = yTest * var + mean
    yPredict = yPredict * var + mean
    yPredict[yPredict < 0] = 0  # 预测值出现负数直接归零
    return yPredict, yTest
Ejemplo n.º 16
0
 def __collect(json_file_path, source_filenames_path, dump_path):
     function_tuples = []
     with open(json_file_path, 'r') as f1, open(source_filenames_path,
                                                'r') as f2:
         for json_idx, json_tree_str in enumerate(tqdm(f1.readlines())):
             if json_idx > 50:  # TEST
                 print('Force Early Stopping.')
                 break
             json_tree = json.loads(json_tree_str)
             source_code_file = self.py150_source_dir + f2.readline(
             )[:-1]  # remove '\n'
             if len(json_tree) == 0:
                 continue
             try:
                 with open(source_code_file, 'r') as f:
                     tree = ast.parse(f.read())
                 function_dict = process(json_idx, json_tree, tree)
                 for functionItem in function_dict.values():
                     if functionItem.node_idx >= 0:
                         token_seq = filter_tokens(
                             functionItem.raw_token_seq)
                         function_tuples.append(
                             (functionItem.functionName,
                              functionItem.json_idx,
                              functionItem.node_idx, token_seq))
             except IOError:  # no FileNotFoundError in py2
                 print("Early stopping of preprocessing")
                 break
     with open(dump_path, 'wb') as f:
         pickle.dump(function_tuples, f)
Ejemplo n.º 17
0
def make(sourcefile, modulename):
  import cleaner, preprocess
  if not os.access(sourcefile, os.F_OK):
    raise IOError(sourcefile)
  #sourcefile
  basename = os.path.basename(sourcefile)
  preprocessed = "%s.c"%(modulename)
  cleaned = "%s_clean.c"%(modulename)
  #xml = "%s.xml"%(modulename)
  pyfinal = "%s.py"%(modulename)
  if not os.access(pyfinal, os.F_OK):
    if not os.access(cleaned, os.F_OK):
      if not os.access(preprocessed, os.F_OK):
        # preprocess the file
        if preprocess.process(sourcefile, preprocessed) > 0:
          return
      log.info('PREPROCESS - OK')
      # clean it
      if cleaner.clean(preprocessed, cleaned) > 0:
        return
    log.info('CLEAN - OK')
    # generate yfinal
    if gen(cleaned, modulename) > 0:
      return
  log.info('PYFINAL - OK')
  __import__(modulename)
  import inspect
  nbClass = len(inspect.getmembers(sys.modules[modulename], inspect.isclass))
  nbMembers = len(inspect.getmembers(sys.modules[modulename], inspect.isclass))
  log.info("module %s has %d members for %d class"%(modulename, nbMembers, nbClass))
Ejemplo n.º 18
0
def main_op():
    review_spirit = w.get('1.0', END)
    demo = process(review_spirit)

    demo1 = creation_list_mots(demo)
    demo2 = ('sentiment est ' + clf.classify(demo1))
    l2 = Label(bottom_frame, text=demo2)
    l2.pack()
Ejemplo n.º 19
0
    def test_one(self):

        # All will be removed, except the first column in both alignments
        msa_a = TabularMSA([Protein('DL-'), Protein('KL-'), Protein('DL-')])
        msa_b = TabularMSA([Protein('KT-'), Protein('DT-'), Protein('KT-')])
        contact_mtx = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
        exp_contact_mtx = np.array([[1]])
        gap_threshold = 0.5
        num_mtx_a, bin_mtx_a, gappy_idxs_a, constant_idxs_a = preprocess.process(
            msa_a, gap_threshold, AA_TABLE)
        num_mtx_b, bin_mtx_b, gappy_idxs_b, constant_idxs_b = preprocess.process(
            msa_b, gap_threshold, AA_TABLE)
        proc_contact_mtx = preprocess.process_contact_mtx(
            contact_mtx, gappy_idxs_a, constant_idxs_a, gappy_idxs_b,
            constant_idxs_b)
        print(proc_contact_mtx)
        print(exp_contact_mtx)
        assert np.array_equal(proc_contact_mtx, exp_contact_mtx)
Ejemplo n.º 20
0
def save_valid_midis(url, directory="midis"):
    valid_instruments = ["piano", "harpsichord"]
    links = get_all_links(url)
    prev_midis = [get_first_notes(x) for x in get_midis_in_directory("midis")]

    os.mkdir("temp")
    for link in list(links):
        name = re.findall("/[^/]+", link)[-1][1:]
        urlretrieve(link, "temp.mid")

        midi = mido.MidiFile("temp.mid")

        if len(midi.tracks) > 3:
            continue

        valid = True
        for track in midi.tracks[1:]:
            valid = valid and reduce(
                lambda a, b: a in track.name.lower() or b in track.name.lower(
                ), valid_instruments)

        if not valid:
            continue

        notes = get_first_notes(midi)
        for notes1 in prev_midis:
            if check_two_midis_similar(notes1, notes):
                valid = False
                break

        if not valid:
            continue

        prev_midis.append(notes)

        midi.save(filename="temp" + "/" + name)

    process("temp")

    for file in os.listdir("temp"):
        shutil.copy("temp/" + file, "midis/" + file)

    shutil.rmtree("temp")
    print("downloaded all")
Ejemplo n.º 21
0
def train(data, targets, filenames):
    targets = [val == "INFEC" for val in targets] # Set INFEC as positive val
   
    # Choose training mode
    options = ["Cross validation", "Build and test model"]
    res = ui.prompt(options=options)
    mode = options[int(res)]

    # Choose ML algorithm
    options = ["Support Vector Machine", "Random Forest",
            "Decision Tree Classifier", "KNN"]
    res = ui.prompt("Choose a ML algorithm:", options)
    switch = {
        0: svm.SVC(C=100., random_state=0),
        1: RandomForestClassifier(n_estimators=50, max_depth=None, random_state=0),
        2: DecisionTreeClassifier(random_state=0),
        3: KNeighborsClassifier()
    }
    clf = switch.get(int(res))

    if mode == "Cross validation":
        model_evaluation(data, targets, clf)
    elif mode == "Build and test model":
        # Train model
        clf.fit(data, targets)

        # Get test dir
        while True:
            dirname = ui.prompt("Which directory are the test files in?")
            if os.path.isdir(dirname):
                break
            print("ERROR: Directory not found.")

        # Set up data/targets for test model
        print("\n************************************")
        print("*  PREPARING MODEL FOR EVALUATION  *")
        print("************************************")

        pageNames, y_true, filenames = pproc.process(dirname)    
        y_true = [val == "INFEC" for val in y_true] # Set INFEC as positive val
        test_data = ft.features(pageNames)
   
        y_pred = clf.predict(test_data)

        save_filenames(y_true, y_pred, filenames)
    
        conf_matrix = skm.confusion_matrix(y_true, y_pred)
        accuracy = skm.accuracy_score(y_true, y_pred)
        precision = skm.precision_score(y_true, y_pred, average=None)
        recall = skm.recall_score(y_true, y_pred, average=None)
        f1 = skm.f1_score(y_true, y_pred, average=None)
        print("\n{}".format(conf_matrix))
        print("Accuracy:  {}".format(accuracy))
        print("Precision: {}".format(precision[1]))
        print("Recall:    {}".format(recall[1]))
        print("F1:        {}".format(f1[1]))
Ejemplo n.º 22
0
def extract_names(cv_dir, word_limit):
    extracted_names = {}
    files = os.listdir(cv_dir)
    for file in files:
        if file.endswith('.pdf'):
            text = convert2txt.extract_text(cv_dir + file, '.pdf')
            words = text.split()
            text = ' '.join(words[:word_limit])
            text = preprocess.process(text)
            nlp_text = nlp(text)
            extracted_names[file] = []
            for e in nlp_text.ents:
                extracted_names[file].append(e.text)
        elif file.endswith('.doc'):
            text = convert2txt.extract_text(cv_dir + file, '.doc')
            words = text.split()
            text = ' '.join(words[:word_limit])
            text = preprocess.process(text)
            nlp_text = nlp(text)
            extracted_names[file] = []
            for e in nlp_text.ents:
                extracted_names[file].append(e.text)
        elif file.endswith('.docx'):
            text = convert2txt.extract_text(cv_dir + file, '.docx')
            words = text.split()
            text = ' '.join(words[:word_limit])
            text = preprocess.process(text)
            nlp_text = nlp(text)
            extracted_names[file] = []
            for e in nlp_text.ents:
                extracted_names[file].append(e.text)
        elif file.endswith('.txt'):
            with open(cv_dir + file, encoding='utf-8') as f:
                text = f.read()
            words = text.split()
            text = ' '.join(words[:word_limit])
            text = preprocess.process(text)
            nlp_text = nlp(text)
            extracted_names[file] = []
            for e in nlp_text.ents:
                extracted_names[file].append(e.text)
    return extracted_names
Ejemplo n.º 23
0
def read_data(path, n_bin=3):
    prob_name = path.split('/')[-1]
    datafile = path + '/' + prob_name + '.data'
    # data = np.loadtxt(datafile, delimiter=',', dtype=str)
    data = parse_c45(prob_name, path)
    data = np.asarray(data.to_float())
    # print(data)
    X = data[:, 1:-1]
    X = process(X, prob_name, n_bin)
    y = data[:, -1].astype(int)
    return X, y
Ejemplo n.º 24
0
    def predict(self, image_path):
        '''
        模型预测返回结果
        :param input:  评估传入样例 {"image_path":"image\/172691.jpg"}
        :return: 模型预测成功之后返回给系统样例 {"label":"ZASSEOR"}
        '''

        pred = ''
        for n in range(len(self.model_list)):
            model = self.model_list[n]
            model = model.to(device)
            # img = Image.open(image_path).convert("RGB")
            srcimg = cv2.imread(image_path, 1)
            img = cv2.cvtColor(srcimg, cv2.COLOR_BGR2GRAY)
            # 分割手写字符
            roi, roicon = process(img)  # [row_up, row_down], [(), (), ...()]
            if roi is None:
                return {"label": "None"}

            roi_imgs = get_roi_img(srcimg, roi, roicon, gap=2)
            if len(roi_imgs) == 0:
                return {"label": "None"}
            # roi_imgs 拆分
            roi_imgs = roiimg_split(roi_imgs)

            big_indx = None
            big_indx = find_space(roicon)

            # roi_imgs 检查空格
            for i in range(len(roi_imgs)):
                # cv2.imshow('' ,cv2.resize(roi_imgs[i], (40, 56), cv2.INTER_LANCZOS4))
                # cv2.waitKey()
                roi_imgs[i] = torch.FloatTensor(
                    cv2.resize(roi_imgs[i], (40, 56), cv2.INTER_LANCZOS4))
            inputs = torch.stack(roi_imgs, dim=0).permute(0, 3, 1,
                                                          2).to(device)

            inputs = inputs.to(device)
            output = model(inputs)
            if output.ndim == 0:
                return {"label": "None"}
            predict = torch.max(output, 1)[1]
            for i in range(predict.shape[0]):
                if predict[i] < 26:
                    pred += chr(predict[i].item() + 65)
                elif predict[i] == 26:
                    pred += '-'
                elif predict[i] == 27:
                    pred += "'"
            if big_indx is not None:
                l_pred = list(pred)
                l_pred.insert(big_indx, ' ')
                pred = ''.join(l_pred)
        return {"label": pred}
Ejemplo n.º 25
0
def predict(caffemodel, deploy, dicom_file, IMAGE_SIZE=227, LAYER_NAME="my-fc8"):
    age = info.getInfo(dicom_file)
    im = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE)
    caffe.set_mode_gpu()
    net = caffe.Net(deploy, caffemodel, caffe.TEST)
    net.blobs['data'].reshape(1, 3, IMAGE_SIZE, IMAGE_SIZE)
    # read a dicom file
    net.blobs['data'].data[...] = im
    output = net.forward()
    predict_age = output[LAYER_NAME][0][0]
    # return age, predict_age
    print("%s predict: %s real: %s" % (dicom_file, predict_age, age))
Ejemplo n.º 26
0
def get_pred():
    global image
    image = get_img(master, w)
    image = np.asarray(image)[:,:,0]
    image = preprocess.process(image)
    image = np.expand_dims(image, axis=0)
    image = np.expand_dims(image, axis=3)
    pred = classifier.predict(image)[0]
    pred = class_indices[np.argmax(pred)]
    print_pred(pred)
    w.delete('all')
    master.after(10000, get_pred)
Ejemplo n.º 27
0
def get_formatted_data(train=True):
    """
    """

    data = None

    if (train):
        data = get_data()
    else:
        data = get_test_data()

    data['text1_processed'] = data.text1.apply(lambda x: pp.process(x))
    data['text2_processed'] = data.text2.apply(lambda x: pp.process(x))

    ## get the tokens
    ## https://keras.io/preprocessing/text/#tokenizer
    ## https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html

    list1 = list(data.text1.values.astype(str))
    list2 = list(data.text2.values.astype(str))

    # Preprocessed text
    # Experiment - with stopwords and lemmatization - Probably not a good idea
    # list1 = list(data.text1_processed.values.astype(str))
    # list2 = list(data.text2_processed.values.astype(str))
    all_questions = list1 + list2

    tokenizer.fit_on_texts(all_questions)

    maximum_length_of_question = 40
    sequences_text1 = tokenizer.texts_to_sequences(data.text1.values)
    sequences_text1 = sequence.pad_sequences(sequences_text1,
                                             maxlen=maximum_length_of_question)

    sequences_text2 = tokenizer.texts_to_sequences(
        data.text2.values.astype(str))
    sequences_text2 = sequence.pad_sequences(sequences_text2,
                                             maxlen=maximum_length_of_question)

    return [sequences_text1, sequences_text2]
def generate_siamese_lmdb(source, target, IMAGE_SIZE=227):
    env = lmdb.Environment(target, map_size=int(1e12), writemap=True)
    dataset = generate_siamese_dataset(source, totals=250000)
    _same = dataset[0]
    _diff = dataset[1]
    random.shuffle(_same)
    random.shuffle(_diff)
    for x in _same:
        x.append(1)
    for x in _diff:
        x.append(0)
    samples = []
    samples.extend(_same)
    samples.extend(_diff)
    # print samples
    random.shuffle(samples)
    random.shuffle(samples)
    random.shuffle(samples)
    # print len(samples)
    # print samples
    with env.begin(write=True) as txn:
        datum = caffe.proto.caffe_pb2.Datum()
        dimension = 3
        datum.channels = dimension
        datum.height = IMAGE_SIZE
        datum.width = IMAGE_SIZE
        sample = np.zeros((2*dimension, IMAGE_SIZE, IMAGE_SIZE))
        index = 0
        for one_sample in samples:
            print index, one_sample
            label = one_sample[-1]
            sample[:dimension, :, :] = preprocess.process(one_sample[0], IMAGE_SIZE)
            sample[dimension:, :, :] = preprocess.process(one_sample[1], IMAGE_SIZE)
            datum.data = sample.tobytes()
            datum.label = label
            str_id = "%8d" % index
            txn.put(str_id, datum.SerializeToString())
            index = index + 1
def get_test_data(data_dir='../test/', num_dir=1, IM_SIZE=(512, 512), threshold=0.5):
    _, _, preprocessed_cutouts, original_img_shape = process(crop_size=IM_SIZE[0], data_dir=data_dir, num_dir=num_dir,
                                                             threshold=threshold)  # preprocessed cutouts contains list of list of cutouts per image

    prep_np = []
    for folder_no in range(len(preprocessed_cutouts)):
        for crop_no in range(len(preprocessed_cutouts[folder_no])):
            preprocessed_cutouts[folder_no][crop_no][0] = \
                preprocessed_cutouts[folder_no][crop_no][0].transpose([2, 0, 1])
        cutout_np = np.asarray([pr[0] for pr in preprocessed_cutouts[folder_no]])

        prep_np.append(cutout_np)

    return prep_np, preprocessed_cutouts, original_img_shape
def main():
    input_file = 'zh_wiki_00'
    output_file = 'zh_words'

    in_file = open(input_file, 'r')
    out_file = open(output_file, 'w+')

    for line in in_file.readlines():
        out = process(line)
        if len(out):
            out_file.write(str(out))
            out_file.write('\n')

    in_file.close()
    out_file.close()
Ejemplo n.º 31
0
def predict_dir(caffemodel, deploy, source_list, IMAGE_SIZE=227, LAYER_NAME="my-fc8", mode=True, BORDER_AGE=18):
    # f = open("predict.log", "w")
    file_list = []
    correct_num = 0
    for source in source_list:
        for root, dirs, files in os.walk(source):
            for file in files:
                file_list.append(os.path.join(root, file))
                # f.write(str(real_age)+" "+str(predict_age)+'\n')
    # f.close()
    images = np.zeros((len(file_list), 3, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float)

    # read age list
    real_ages = []
    for index, dicom_file in enumerate(file_list):
        print dicom_file
        real_age = info.getInfo(dicom_file)
        real_ages.append(real_age)
        images[index, :, :, :] = preprocess.process(dicom_file, IMAGE_SIZE=IMAGE_SIZE)
        # if abs(predict_age - real_age)<=3:
        #     correct_num = correct_num+1
    if mode:
        caffe.set_mode_gpu()
    else:
        caffe.set_mode_cpu()
    net = caffe.Net(deploy, caffemodel, caffe.TEST)
    output = {}
    for x in range(0, len(file_list), 10):
        # net.blobs['data'].reshape(100, 3, IMAGE_SIZE, IMAGE_SIZE)
        if len(file_list) - x < 10:
            net.blobs['data'].reshape(len(file_list) - x, 3, IMAGE_SIZE, IMAGE_SIZE)
            net.blobs['data'].data[...] = images[x:]
        else:
            net.blobs['data'].reshape(10, 3, IMAGE_SIZE, IMAGE_SIZE)
            net.blobs['data'].data[...] = images[x:x+10]
        o = net.forward()
        output = dict(output.items()+o.items())
    for index, result in enumerate(output[LAYER_NAME]):
        predict_age = result[0]
        real_age = real_ages[index]
        '''
        the condition that you think the prediction result is correct
        '''
        # if abs(predict_age - real_age) <= 3:
        #     correct_num = correct_num+1
        if (predict_age > BORDER_AGE and real_age > BORDER_AGE) or (predict_age <= BORDER_AGE and real_age <= BORDER_AGE):
            correct_num = correct_num+1
    return float(correct_num)/len(file_list)
Ejemplo n.º 32
0
    def test_process_1(self):
        aln = TabularMSA([Protein('AL-'), Protein('VL-'), Protein('MLA')])
        gap_thr = 0.5

        exp_num = [[AA_TABLE['A']], [AA_TABLE['V']], [AA_TABLE['M']]]
        exp_bin = [[
            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]]

        num_mtx, bin_mtx, gappy_idxs, constant_idxs = preprocess.process(
            aln, gap_thr, AA_TABLE)
        assert np.array_equal(exp_num, num_mtx)
        assert np.array_equal(exp_bin, bin_mtx)
        assert gappy_idxs == [2]
        assert constant_idxs == [1]
Ejemplo n.º 33
0
def post_img():
    result = {}
    try:
        f = open('img.jpg', 'wb')
        f.write(request.get_data())
        f.close()

        roi = preprocess.process('img.jpg')
        cv2.imwrite('cropped.jpg', roi)

        image_data = tf.gfile.FastGFile('cropped.jpg', 'rb').read()

        label_lines = [line.rstrip() for line in tf.gfile.GFile("/tf_files/retrained_labels.txt")]

        with tf.gfile.FastGFile("/tf_files/retrained_graph.pb", 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            _ = tf.import_graph_def(graph_def, name='')

        with tf.Session() as sess:
            softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')

            predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data})

            top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
            best_guess = top_k[0]

            guesses = []
            result['error'] = False
            result['guesses'] = []
            for node_id in top_k:
                human_string = label_lines[node_id]
                score = predictions[0][node_id]
                result['guesses'].append({
                    'name': human_string,
                    'confidence': "%0.5f" % score
                })

    except Exception as e:
        result = {
            "error" : True,
            "guesses" : []
        }
        app.logger.warning(e)

    return json.dumps(result) + "\n"
Ejemplo n.º 34
0
def make(sourcefile, modulename, target=False):
  ''' using gccxml directly distort ctypeslib performances
  but on some libraries, we don't have a choice.
  '''
  if not os.access(sourcefile, os.F_OK):
    raise IOError(sourcefile)
  #sourcefile
  basename = os.path.basename(sourcefile)
  preprocessed = "%s.c"%(modulename)
  cleaned = "%s_clean.c"%(modulename)
  xml = "%s.xml"%(modulename)
  pyfinal = "%s.py"%(modulename)
  if target:
    gen2(sourcefile, modulename, target)
    log.info('PYFINAL - OK')
  else:
    if not os.access(pyfinal, os.F_OK):
      if not os.access(cleaned, os.F_OK):
        if not os.access(preprocessed, os.F_OK):
          # preprocess the file
          if preprocess.process(sourcefile, preprocessed) > 0:
            return
        log.info('PREPROCESS - OK')
        # clean it
        if cleaner.clean(preprocessed, cleaned) > 0:
          return
      log.info('CLEAN - OK')
      # generate yfinal
      if gen(cleaned, modulename) > 0:
        return
    log.info('PYFINAL - OK')
  __import__(modulename)
  import inspect
  nbClass = len(inspect.getmembers(sys.modules[modulename], inspect.isclass))
  nbMembers = len(inspect.getmembers(sys.modules[modulename], inspect.isclass))
  log.info("module %s has %d members for %d class"%(modulename, nbMembers, nbClass))
Ejemplo n.º 35
0
def data():
    gd = GT.obtain_gd()

    test = P.process()

    return (gd, test)
Ejemplo n.º 36
0
def processText(docContent):
	tokens = process(docContent)
	tokenFrequency = defaultdict(list)
	for token in tokens:
		tokenFrequency[token] = tokenFrequency.get(token, 0) + 1
	return set(tokens), tokenFrequency
Ejemplo n.º 37
0
def process_file(segment, filename):
    with codecs.open(filename, 'r', 'utf-8') as f:
        for sentence in preprocess.process(f):
            yield segment(sentence)