Exemple #1
0
    def __init__(self, word_dim, char_dim, max_sent_len, max_char_len,
                 learning_rate, num_train_steps):

        self.word_dim = word_dim
        self.char_dim = char_dim
        self.max_sent_len = max_sent_len
        self.max_char_len = max_char_len
        self.learning_rate = learning_rate
        self.num_train_steps = num_train_steps

        ## Preprocess data
        self.prepro = preprocess.Preprocess(self.char_dim, self.max_sent_len,
                                            self.max_char_len)
        self.train_X, self.train_seq_length, self.train_Y, self.test_X, self.test_seq_length, self.test_Y = self.prepro.load_data(
            "./train.csv", "./test.csv", self.max_sent_len)
        self.word_embedding, self.char_embedding = self.prepro.prepare_embedding(
            self.char_dim)
        self.train_X, self.train_X_char, self.train_X_char_len, self.train_Y = self.prepro.prepare_data(
            self.train_X, self.train_Y, "train")
        self.test_X, self.test_X_char, self.test_X_char_len, self.test_Y = self.prepro.prepare_data(
            self.test_X, self.test_Y, "test")

        ## Placeholders
        self.word_input = tf.placeholder(tf.int32,
                                         shape=[None, max_sent_len],
                                         name='word')
        self.char_input = tf.placeholder(
            tf.int32, shape=[None, max_sent_len, max_char_len], name='char')
        self.label = tf.placeholder(tf.int32, shape=[None], name='label')
        self.seq_len = tf.placeholder(tf.int32, shape=[None])
        self.char_len = tf.placeholder(tf.int32, [None, max_sent_len])
        self.dropout = tf.placeholder(tf.float32, shape=())
Exemple #2
0
def store_features2(name):
    pre = preprocess.Preprocess('./BTCUSDT/dol_bar.csv')
    df = pre.x_feature2()
    df.to_csv("./BTCUSDT/features_min.csv")
    df = pre.clean_df()
    df.to_csv("./BTCUSDT/features_min_clean.csv")
    print(name)
Exemple #3
0
def run():
    print "Loading data..."
    # load training data
    trainImages,trainLabels=dl.load_mnist_train()

    imDim = trainImages.shape[0]
    inputDim = 50
    outputDim = 10
    layerSizes = [16]*2

    trainImages = trainImages.reshape(imDim**2,-1)

    pcer = pc.Preprocess()
    pcer.computePCA(trainImages)
    whitenedTrain = pcer.whiten(trainImages, inputDim)

    minibatch = whitenedTrain.shape[1]
    print "minibatch size: %d" % (minibatch)
    epochs = 10000
    stepSize = 1e-2

    nn = nnet.NNet(inputDim,outputDim,layerSizes,minibatch)
    nn.initParams()

    SGD = sgd.SGD(nn,alpha=stepSize,minibatch=minibatch)

    for e in range(epochs):
    	print "Running epoch %d"%e
    	SGD.run(whitenedTrain,trainLabels)

    SGD.dumptrace()
Exemple #4
0
def write_preprocess_sentence_without_synonymous():
    with open(const.FIRST_PROCESS_REVIEW_PATH, 'r') as fr:
        fw = open(const.REVIEW_FOR_CLUSTER_PATH, 'w+')
        p = pre.Preprocess()
        for line in fr:
            l = line.split(',')
            cla = l[len(l) - 1].strip()
            j = 1
            s = ''
            while j < len(l) - 2:
                if j == len(l) - 3:
                    s += l[j]
                else:
                    s += l[j] + ','
                j += 1
            # print s
            # 预处理
            p.set_sentence(s)
            res = p.preprocess(False)
            # 如果出现非英文文本,处理为空文本,并标位第4类
            if res == '':
                cla = '4'
            fw.write(l[0] + ',' + res + ',' + cla + '\n')

        fw.close()
Exemple #5
0
def get_labels_no_side(outfolder, inputfile="x_features.csv"):
    inputfile = outfolder + inputfile
    pre = preprocess.Preprocess(inputfile)
    df = pre.label_fix_no_side(is_infile=True, inputfile=inputfile)
    print(df.head())
    df.to_csv(outfolder + "labels_no_side.csv")
    return df
Exemple #6
0
def write_preprocess_sentence():
    with open(const.MARKED_REVIEW_PATH, 'r') as fr:
        fw = open(const.FIRST_PROCESS_REVIEW_PATH, 'w+')
        i = 0
        p = pre.Preprocess()
        for line in fr:
            l = line.split(',')
            cla = l[len(l) - 1].strip()
            j = 5
            s = ''
            while j < len(l) - 2:
                s += l[j] + ','
                j += 1
            # print s
            # 预处理
            p.set_sentence(s)
            res = p.preprocess()
            # 如果出现非英文文本,处理为空文本,并标位第4类
            if res == '':
                cla = '4'
            fw.write(l[0] + ',' + s + res + ',' + cla + '\n')

            i += 1
            if i == 1300:
                break

        fw.close()
Exemple #7
0
def main():
    root = utils.get_root_path(False)
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option('--learning_rate_rbm',
                      action='store',
                      type='string',
                      dest='learning_rate_rbm')
    parser.add_option('--epochs_rbm',
                      action='store',
                      type='string',
                      dest='epochs_rbm')
    parser.add_option('--batch_size',
                      action='store',
                      type='string',
                      dest='batch_size')
    parser.add_option('--data_set',
                      action='store',
                      type='string',
                      dest='data_set')

    (opts, args) = parser.parse_args()

    file_data = ReadFile.ReadFile(root + '/NSL_KDD-master',
                                  opts=opts).get_data()
    data_pp = preprocess.Preprocess(file_data).do_predict_preprocess()
    dbn_model.DBN(data_pp).do_dbn('yadlt', opts=opts)
    dbn_model.DBN(data_pp).do_dbn_with_weight_matrix(root + '/save')
    model.do_svm()
Exemple #8
0
def get_labels(outfolder,inputfile="x_features.csv"):
    inputfile = outfolder+inputfile
    pre = preprocess.Preprocess(inputfile)
    df = pd.read_csv(inputfile,index_col=0)
    df['date_time'] = pd.to_datetime(df.date_time)
    df.index = df.date_time 
    df.drop(columns=['date_time'],inplace=True)
    # print(df.head())
    # return 
    df = pre.labeling(df)
    print(df.head())
    df.to_csv(outfolder+"labels.csv")
# get_labels("./BNBUSDT/")
# outfolder = ["./BNBUSDT/","./ETHUSDT/"]
# p1 = Process(target=get_labels,args=(outfolder[0],"x_features.csv"))
# p1.start() 
# p2 = Process(target=get_labels,args=(outfolder[1],))
# p2.start() 

# def clean_df(outfolder,inputfile="x_features.csv"):
#     inputfile = outfolder+inputfile
#     df = pd.read_csv(inputfile)
#     df.dropna(axis=0, how='any', inplace=True)
#     df.to_csv(outfolder+"x_features_clean.csv")

# outfolder = ["./BNBUSDT/","./ETHUSDT/"]
# p1 = Process(target=gtd.get_training_dataset,args=(outfolder[0],))
# p1.start() 
# p2 = Process(target=gtd.get_training_dataset,args=(outfolder[1],))
# p2.start() 
Exemple #9
0
def change_tfidf(text):
    pre = preprocess.Preprocess()
    with open('tfidf.pickle', 'rb') as handle:
        vectorizer = pickle.load(handle)
    clean = ' '.join(e for e in pre.preprocess(text))
    vector = vectorizer.transform([clean])
    return vector.toarray()
Exemple #10
0
    def __init__(self,
                 min_bid=0.0,
                 max_bid=MAX_BID,
                 min_cost=0.0,
                 max_cost=MAX_EXP):
        self.min_bid = min_bid
        self.max_bid = max_bid
        self.min_cost = min_cost
        self.max_cost = max_cost

        kernel_cost = gpflow.kernels.SquaredExponential(
        )  #  * gpflow.kernels.Constant()  # * gpflow.kernels.SquaredExponential()
        kernel_rev = gpflow.kernels.SquaredExponential(
        )  # * gpflow.kernels.Constant()  # * gpflow.kernels.SquaredExponential()

        self.__mean_cost = None  # gpflow.mean_functions.Linear()  # None  # 0.0
        self.__mean_rev = None  # gpflow.mean_functions.Linear()  # None  # 0.0

        self.__input_scaler = preprocess.Preprocess(with_scaler=True,
                                                    with_mean=False,
                                                    with_std=False)
        self.__output_cost_scaler = preprocess.Preprocess(scale_min=0.0,
                                                          scale_max=50.0,
                                                          with_scaler=True,
                                                          with_mean=True,
                                                          with_std=False)
        self.__output_rev_scaler = preprocess.Preprocess(scale_min=0.0,
                                                         scale_max=50.0,
                                                         with_scaler=True,
                                                         with_mean=True,
                                                         with_std=False)

        # data already knew
        self.X = np.array(0.0).reshape(
            -1, 1)  #  self.__input_scaler.fit(np.array(0.0).reshape(-1, 1))
        self.Y_cost = np.array(0.0).reshape(
            -1, 1)  # self.__output_scaler.fit(np.array(0.0).reshape(-1, 1))
        self.Y_rev = np.array(0.0).reshape(
            -1, 1)  # self.__output_scaler.fit(np.array(0.0).reshape(-1, 1))
        self.transformed_X = self.__input_scaler.fit(
            np.array(0.0).reshape(-1, 1))
        self.transformed_Y_cost = self.__output_cost_scaler.fit(
            np.array(0.0).reshape(-1, ))
        self.transformed_Y_rev = self.__output_rev_scaler.fit(
            np.array(0.0).reshape(-1, ))

        self._optimize()
Exemple #11
0
def add_features(outfolder, inputfile="dol_bar.csv"):
    inputfile = outfolder + inputfile
    pre = preprocess.Preprocess(inputfile)
    print('compute x_features.csv')
    x_features = pre.x_feature2()
    x_features.to_csv(outfolder + "x_features.csv")
    df = pre.clean_df()
    df.to_csv(outfolder + "x_features_clean.csv")
Exemple #12
0
 def manual(self, args):
     """Manual execute."""
     # read manual configure file
     pre_process = preprocess.Preprocess()
     pre_process.readconfig('manual.ini')
     self.endoutput = pre_process.endoutput
     self.midoutput = pre_process.midoutput
     self.respath = pre_process.respath
     self.gsystem = pre_process.gsystem
     self.ctype = pre_process.ctype
     self.duration = pre_process.duration
     self.prn = pre_process.prn
     # choose moduel
     if args[1].upper() == '-A':
         # start process
         process = list()
         process.append(multiprocessing.Process(target=self.enu))
         process.append(multiprocessing.Process(target=self.uh))
         process.append(multiprocessing.Process(target=self.satnum))
         process.append(multiprocessing.Process(target=self.satiode))
         process.append(multiprocessing.Process(target=self.satorbitc))
         [p.start() for p in process]
         [p.join() for p in process]
         print('All Done!')
     elif args[1].upper() == '-R':
         self.report()
         print('Done!')
     elif args[1].upper() == '--ENU':
         self.enu()
         print('Done!')
     elif args[1].upper() == '--HV':
         self.uh()
         print('Done!')
     elif args[1].upper() == '--HVM':
         self.uhmean()
         print('Done!')
     elif args[1].upper() == '--SAT':
         self.satnum()
         print('Done!')
     elif args[1].upper() == '--IODE':
         self.satiode()
         print('Done!')
     elif args[1].upper() == '--ORBITC':
         self.satorbitc()
         print('Done!')
     elif args[1].upper() == '--HELP' or args[1]:
         print('Arg:')
         print('\t-a -A:execute all module.')
         print('\t-r -R:zdpos report')
         print('\t--ENU:plot ENU')
         print('\t--HV:plot horizontal and vertical errors')
         print('\t--HVM:plor mean of horizontal and vertical errors')
         print('\t--SAT:plot satellite number')
         print('\t--IODE:plot satellite iode')
         print('\t--ORBITC:plot orbit and clock errors')
Exemple #13
0
    def __init__(self , data_dir = 'data'):
        self.data_dir = data_dir
        #self.train_dir_name =  os.path.join(self.data_dir ,'/EASC-UTF-8/Articles/')
        #self.test_dir_name  =  os.path.join(self.data_dir ,'/EASC-UTF-8/Articles/')
        #self.train_dir_name =  'data/EASC-UTF-8/Articles/'
        #self.test_dir_name  =  'data/EASC-UTF-8/Articles/'

        self.train_dir_name =  'data\EASC-UTF-8\Articles'
        self.test_dir_name =   'data\EASC-UTF-8\MTurk'
        self.data =pd.DataFrame(columns = ['Orignal' ,'Summary1' ,'Summary2' ,'Summary3' ,'Summary4' ,'Summary5'])
        self.pr = preprocess.Preprocess()
Exemple #14
0
def database_compare(images, query_img, i_size, n_angles):
    pre = preprocess.Preprocess(i_size, n_angles)
    pre.process_img(query_img)
    hitmap = pre.get_hitmap()
    edgel_count = pre.get_edgel_counts()[0]
    hit_counts = np.asarray([0.] * len(images))
    for i in range(0, len(images)):
        edgels = pre.get_edgels(images[i])
        for edgel in edgels:
            for _ in hitmap[edgel[0]][edgel[1]][edgel[2]]:
                hit_counts[i] += 1
    return hit_counts / edgel_count
Exemple #15
0
def main():
    #filter warnings
    warnings.warn = warn
    x = pr.Preprocess()
    #preprocessing
    X_train, Y_train, X_dev, Y_dev, X_test, Y_test = x.get_data()

    #Using sys agr
    if (sys.argv[1] == 'train_b'):
        train_baseline(X_train, Y_train, X_dev, Y_dev, X_test, Y_test)
    elif (sys.argv[1] == 'train_e'):
        train_extended(X_train, Y_train, X_dev, Y_dev, X_test, Y_test)
    elif (sys.argv[1] == 'hyper_tune'):
        feature_tuning(X_train, Y_train, X_dev, Y_dev, X_test, Y_test)
Exemple #16
0
    def __init__(self,batch_size,word_dim,hidden_dim,num_layers,max_vocab_size,max_word_len,learning_rate,training_epochs,path, isTrain=True):
        
        self.batch_size = batch_size
        self.word_dim = word_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.max_vocab_size = max_vocab_size
        self.max_word_len = max_word_len
        self.learning_rate = learning_rate
        self.training_epochs = training_epochs
        self.path = path

        ## Preprocess data
        self.prepro = preprocess.Preprocess(word_dim=word_dim, max_vocab_size=max_vocab_size, path=path)
        self.word_embedding, self.clear_padding, self.word2idx, self.idx2word = self.prepro.build_embedding()
                
        ## Placeholders
        self.word_idx = tf.placeholder(tf.int32, shape = [None, max_word_len], name = 'word_idx')
        self.label = tf.placeholder(tf.int32, shape = [None], name = 'label')
        self.seq_len = tf.placeholder(tf.int32, shape = [None], name = 'seq_len')
        self.dropout = tf.placeholder(tf.float32, shape = (), name = 'dropout')
        
        ## Read file
        self.train_text, self.train_len, self.train_score = self.prepro.read_data(self.path + '/ratings_train.txt') 
        self.test_text, self.test_len, self.test_score = self.prepro.read_data(self.path + '/ratings_test.txt') 
        self.train_size, self.test_size = len(self.train_score), len(self.test_score)
        num_train_steps = int(self.train_size / self.batch_size) + 1
        
        train_dataset = tf.data.Dataset.from_tensor_slices((self.word_idx, self.label, self.seq_len))
        train_dataset = train_dataset.shuffle(self.train_size)
        train_dataset = train_dataset.batch(self.batch_size)
        train_dataset = train_dataset.repeat()
        
        test_dataset = tf.data.Dataset.from_tensor_slices((self.word_idx, self.label, self.seq_len))
        test_dataset = test_dataset.batch(self.batch_size)
        test_dataset = test_dataset.repeat()
        
        iters = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
        self.iter_word_idx, self.iter_label, self.iter_seq_len = iters.get_next()

        ## Create the initialisation operations
        self.train_init_op = iters.make_initializer(train_dataset)
        self.test_init_op = iters.make_initializer(test_dataset)
        
        ## Build graph
        self.build_model(isTrain)
        self.build_optimizer(num_train_steps)
        self.get_accuracy()
Exemple #17
0
def main():
    global files
    files = list(glob.glob('*.txt'))
    v = list()

    for filename in files:
        with open(filename) as f:
            text = f.read()

            p = preprocess.Preprocess(text)
            v.append(p.get_list())

    # Creating Index
    idx = indexer.Index(v)

    display_menu(idx)
Exemple #18
0
def compute_low_dimensions_data_matrix(weight, datas):
    _datas = datas
    weight_matrix = np.matmul(
        np.matmul(
            np.matmul(
                np.matmul(weight[0], weight[1]
                          ), weight[2]
            ), weight[3]
        ), weight[4]
    )
    output_train_data = np.matmul(_datas[0][0], weight_matrix)
    output_test_data = np.matmul(_datas[1][0], weight_matrix)

    datas = preprocess.Preprocess(
        ((output_train_data, _datas[0][1]), (output_test_data, _datas[1][1])), type='svm').do_svm_preprocess()

    save_dbn_weight_as_svm(datas[0], 'train')
    save_dbn_weight_as_svm(datas[1], 'test')
Exemple #19
0
def run():
    input_dir = "input"
    output_dir = "output"
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--input", required=True,help="path to input text document")
    #ap.add_argument("-o", "--output", required=True,help="path to output Summarized Document")
    args = vars(ap.parse_args())
    
    
    input_path = os.path.join(input_dir,args['input'])
    output_path = os.path.join(output_dir,args['input'])
    
    pr = preprocess.Preprocess()    
    input_text = pr.get_article_content(input_path)
    summary = get_summary(input_text)
    
    #pdb.set_trace()
    with open(output_path,'w' ,encoding = "utf-8") as f: f.write(summary)
Exemple #20
0
def get_summary(input_text):
    pr = preprocess.Preprocess()
    original_text = input_text
    preprocessed_text = pr.get_clean_article(input_text)
    sentences = pr.get_article_sentences(preprocessed_text)
    original_sentences = pr.get_article_sentences(input_text)
    paragraphs = pr.get_cleaned_article_paragraphes(preprocessed_text)
    para_sent_list = pr.get_para_sentences(paragraphs)
    tokenized_word_sentences = pr.get_tokenized_word_sentences(sentences)
    
    doc = document.Doc(
    original_text = original_text ,  original_sentences = original_sentences ,
    preprocessed_text = preprocessed_text.replace('ppp',""),
    sentences = sentences,
    paragraphs = paragraphs ,para_sent_list = para_sent_list ,tokenized_word_sentences = tokenized_word_sentences)
    
    summary = doc.summarize()
    return summary
Exemple #21
0
    def get_sample_users(self, valid_users=None):
        if self._cache.exist_sample_users():
            if self._run is not None and self._run.verbose:
                print('Sample users cache found')
            # Check if sample users is in cache
            # If sampel uses exist in cache read and return that
            sample_users = self._cache.read_sample_users()
            return sample_users
        else:
            if self._run is not None and self._run.verbose:
                print('Sample users cache not found. Generating.')
            # If not in cache generate sample users and save
            if valid_users is None:
                valid_users = self.get_valid_users()
            sample_users = random.sample(valid_users, self._config.sample_size)

            if self._run is not None and self._run.verbose:
                print('Sample users generated. Checking for outliers.')

            # Check for outliers in user_samples
            self._db.open()
            time_start, time_end = self._db.get_time_min_max()
            prep = preprocess.Preprocess()

            # Check if there are outliers in the selected sample
            links = self._db.get_links(time_start, time_end, sample_users)
            outliers = prep.outlier_nodes(links, sample_users,
                                          self._config.density_neighbors,
                                          self._config.density_cutoff, True)
            if self._run is not None and self._run.verbose:
                print(str(len(outliers)) + ' outliers found')

            # Remove the outliers from the users_sample
            for n in outliers:
                sample_users.remove(n)

            if self._run is not None and self._run.verbose:
                print('Outliers removed. Saving sample users in cache.')

            self._cache.save_sample_users(sample_users)
            self._db.close()

            return sample_users
Exemple #22
0
    def __init__(self, directory, load_file=None):
        self.img_size = 100
        self.n_angles = 6
        direc = directory

        self.pre = preprocess.Preprocess(self.img_size, self.n_angles)
        self.lookup = []

        for subdir, dirs, files in os.walk(direc):
            for f in files:
                filename = os.path.join(subdir, f)
                if filename[-3:] == 'jpg':
                    if not load_file:
                        raw_img = cv2.imread(filename, 0)
                        self.pre.process_img(raw_img)
                    self.lookup.append(filename)
                    print '%i:\t%s' % (len(self.lookup), filename)
        if load_file:
            edgel_counts = database.load_data(self.pre.hits, load_file)
            self.pre.set_edgel_counts(edgel_counts)
Exemple #23
0
    def __init__(self, fname1, fname2):
        self.img_size = 100
        self.n_angles = 4
        self.pre = preprocess.Preprocess(self.img_size, self.n_angles)
        self.test_img = np.zeros((self.n_angles, self.img_size, self.img_size))

        query_img = cv2.imread(fname1, 0)
        database_img = cv2.imread(fname2, 0)

        self.query_set = self.pre.get_edgels(query_img)
        self.pre.process_img(database_img)
        self.hitmap = self.pre.get_hitmap()

        for i, r in enumerate(self.hitmap):
            for j, c in enumerate(r):
                for theta, entry in enumerate(c):
                    if len(entry) > 0:
                        self.test_img[theta, i, j] = 128

        for i, j, theta in self.query_set:
            self.test_img[theta, i, j] = 255
Exemple #24
0
 def autorun(self):
     """Auto run."""
     # read autorun configure file
     pre_process = preprocess.Preprocess()
     pre_process.readconfig('autorun.ini')
     self.endoutput = pre_process.endoutput
     self.midoutput = pre_process.midoutput
     self.respath = pre_process.respath
     self.gsystem = pre_process.gsystem
     self.ctype = pre_process.ctype
     yesterday = datetime.datetime.now().date() + datetime.timedelta(-1)
     self.duration = [yesterday, yesterday]
     # start process
     process = list()
     process.append(multiprocessing.Process(target=self.enu))
     process.append(multiprocessing.Process(target=self.uh))
     process.append(multiprocessing.Process(target=self.satnum))
     [p.start() for p in process]
     [p.join() for p in process]
     # check evaluation quality
     check.check()
     now = datetime.datetime.now().replace(second=0, microsecond=0)
     print('%s: The process of %s Done!' % (str(now), str(yesterday)))
Exemple #25
0
 def startPreprocess(self):
     self.prepro = preprocess.Preprocess()
     self.prepro.run()
Exemple #26
0

@app.route('/', methods=['GET', 'POST'])
def index():
    if request.method == 'POST':
        sentence = request.form["sentence"]
        result = infer_example(sentence, infer_graph, sess)
        return render_template("index.html",
                               result=result,
                               input_sent=sentence)
    return render_template("index.html")


if __name__ == '__main__':
    prepro = preprocess.Preprocess(word_dim=word_dim,
                                   max_vocab_size=max_vocab_size,
                                   path=corpuspath)
    word_embedding, clear_padding, word2idx, idx2word = prepro.build_embedding(
    )
    infer_graph = Model(word_embedding, max_word_len)
    ## Create model graph
    infer_fn = infer_graph.build_model(hidden_dim, num_layers, None, False)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()
    saver.restore(sess, modelpath + modelName)

    app.run(host='0.0.0.0', port=5000, debug=True)
Exemple #27
0
 def __init__(self):
     self.preprocess = preprocess.Preprocess()
     self.batcher = batch.Batcher(self.preprocess)
     self.batchGen = self.batcher.batchGen
     self.embeddingMatrix = self.preprocess.embeddingMatrix
Exemple #28
0
def run(args):
    print '> compute LCS'
    files, labels = pc.getFiles(args.inputfolder,
                                args.suffix,
                                args.labelfile,
                                exact=args.exact)
    if len(args.max_descriptors) == 0:
        descriptors, index_list = pc.loadDescriptors(files,
                                                     rand=True,
                                                     return_index_list=1)
    else:
        descriptors, index_list = pc.loadDescriptors(files,\
                                         max_descs=args.lcs_max_descriptors,
                                         max_descs_per_file=max(int(args.lcs_max_descriptors/len(files)),\
                                                                1),
                                         rand=True,
                                        return_index_list=1)
        print 'descriptors.shape', descriptors.shape
#        #if not args.inputfolders:
#        cur_data, index_list = pc.loadDescriptors(files,
#                                                  max_descs=args.max_descriptors[0]\
#                                                  if args.max_descriptors\
#                                                  else 0,
#                                                  return_index_list=True)

# per descriptor labels:
    if len(index_list) - 1 != len(labels):
        raise ValueError('{} != {} + 1'.format(len(index_list), len(labels)))
    le = preprocessing.LabelEncoder()
    labels = le.fit_transform(labels)
    desc_labels = np.zeros(len(descriptors), dtype=np.uint32)
    for r in xrange(len(labels)):
        desc_labels[index_list[r]:index_list[r + 1]] = labels[r]

    prep = preprocess.Preprocess(args)

    ubm = ubm_adaption.loadGMM(args.load_ubm)
    if not args.no_assignment:
        assignments = encoding.getAssignment(ubm.means_, descriptors)
    lcs = []
    descr = []
    # Note: we could also compute the LCS afterwards using 'multipca' option
    # of preprocess...
    for i in range(len(ubm.means_)):
        if args.no_assignment:
            diff = descriptors - ubm.means_[i]
        else:
            for_lcs = descriptors[assignments[:, i] > 0]
            diff = for_lcs - ubm.means_[i]
        if args.resnorm:
            diff = preprocessing.normalize(diff, norm='l2', copy=False)
        if not args.global_cs:
            prep.fit(diff, desc_labels[assignments[:, i] > 0])
            lcs.append(copy.deepcopy(prep.pca))
            prep.pca = None
        else:
            descr.append(diff)

    if args.global_cs:
        print '> compute global lcs'
        diff = np.concatenate(descr, axis=1)
        print '... from descr.shape', diff.shape
        prep.fit(diff, desc_labels)
        print '< compute global lcs'
        lcs = copy.deepcopy(prep.pca)
        prep.pca = None
    folder = os.path.join(args.outputfolder, 'lcs.pkl.gz')
    pc.dump(folder, lcs)
    return folder
Exemple #29
0
import preprocess
import plotting
import matplotlib.pyplot as plt
import pickle
import os
import sys
import database

model_pat = os.path.dirname(os.path.realpath(__file__)) + "/model.sav"
model = pickle.load(open(model_pat, "rb"))

env = preprocess.Preprocess("test_image/car4.jpg")
env.plate_detection()
segmented_characters = env.character_segmentation()
plotting.show()
segmented_characters.sort()

ans = []
for char in segmented_characters:
    #print(plt.imshow(char[1]))
    ans.append(model.predict(char[1].reshape(1, -1)))

license_plate = []
for val in ans:
    license_plate.append(val[0])

for idx in range(len(license_plate)):
    if (idx == 0 or idx == 1 or idx == 4 or idx == 5):
        if (license_plate[idx] == '0'):
            license_plate[idx] = str('O')
        elif (license_plate[idx] == '1'):
Exemple #30
0
import os
import preprocess as pp
from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
mpiSize = comm.Get_size()
name = MPI.Get_processor_name()
pre_process = pp.Preprocess('data.txt', mpiSize)
data_info = pre_process.data
terminate_flag = None

#print pre_process.data.is_build_file

if pre_process.data.is_build_file == True:
    if rank == 0:
        if not pre_process.make_file():
            print "The target file does not exists."
            terminate_flag = True
terminate_flag = comm.bcast(terminate_flag, root=0)
if terminate_flag == True:
    print "the end: ", rank
    exit()

task_set = pre_process.get_task_set(rank)
if len(task_set) == 0:
    exit()
try:
    if not pre_process.set_parameter(task_set, rank):
        exit()
    pre_process.do_montage(task_set, rank)