Python preprocessの例、Preprocess.preprocess Pythonの例

コード例 #1

0

ファイルを表示

def text_rank(data):
    """Applies text_rank algorithm on text passed as a parameter and returns a summary"""

    sentences = tokenize(data)
    l = len(sentences)
    clean_sentences = preprocess(sentences)
    sentence_vectors = vectorize(clean_sentences)
    # Computing Similarity Matrix
    s_mat = np.zeros([l, l])

    for i in range(l):
        for j in range(l):
            if i != j:
                s_mat[i][j] = cosine_similarity(
                    sentence_vectors[i].reshape(1, 50),
                    sentence_vectors[j].reshape(1, 50))[0, 0]

    # Applying PageRank Algorithm - To Calculate Sentence Scores
    graph = nx.from_numpy_array(s_mat)
    sentence_scores = nx.pagerank(graph)

    ranked_sentences = sorted(
        ((sentence_scores[i], s) for i, s in enumerate(sentences)),
        reverse=True)

    summary = []
    x = round(l * 0.2)
    for i in range(x):
        summary.append(ranked_sentences[i][1])
    print(l)
    print(x)
    return clean_summary(summary)

コード例 #2

0

ファイルを表示

    def video_stream(self):

        ret, frame = self.cap.read()
        frame = cv2.flip(frame, 1)

        hand_img = preprocess(frame)
        probs_class_map, self.prediction = self.predict(hand_img)

        if len(self.current_word) != 0 and self.prediction == 'blank':
            self.blank_count += 1
            if self.blank_count > 80:
                self.sentence += ' '
                self.sentence += self.current_word
                self.current_word = ''
                for i in classes:
                    self.count[i] = 0
                self.blank_count = 0

        elif self.prediction != 'blank':
            self.count[self.prediction] += 1
            if self.count[self.prediction] > 50:
                self.current_word += self.prediction
                for i in classes:
                    self.count[i] = 0
                self.blank_count = 0
                self.plot(probs_class_map)

        self.updateDepositLabel(self.prediction, self.current_word,
                                self.sentence)
        cv2Img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
        img = Image.fromarray(cv2Img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.video_label.imgtk = imgtk
        self.video_label.configure(image=imgtk)
        self.video_label.after(1, self.video_stream)

コード例 #3

0

ファイルを表示

    def step(self, action):
        next_state = [[] for empty in range(self.group_size)]
        reward_sum = np.zeros(self.batch_size)

        for i in range(self.group_size):
            outcomes = [env.step(act) for env, act in zip(self.env, action)]

            cols = [[], [], [], []]  # next_state, reward, done, info

            for j in range(self.batch_size):
                one_step = outcomes[j]
                for col, value in zip(cols, one_step):
                    col.append(value)
            cols = [np.array(col) for col in cols]

            cols[0] = np.array([preprocess(cols[0][k][:][:]) for k in range(self.batch_size)])
            next_state[i].append(cols[0])
            reward_sum += cols[1]
        # Now next_state has shape (group_size, 1, batch_size, 88, 80, 1)
        # So reshape to (group_size, batch_size, 88, 80, 1)
        # Split them, stack them to get (batch_size, 88, 80, group_size)
        next_state = np.reshape(next_state, [self.group_size, self.batch_size, 88, 80, 1])
        split_states = [next_state[k] for k in range(self.group_size)]
        next_state = np.array(np.concatenate(split_states, axis=-1))

        return next_state, reward_sum, cols[2], cols[3]

コード例 #4

0

ファイルを表示

ファイル: RUNME_chi2.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(neval=30,
         nfolds=5,
         ncvjobs=1,
         nreps=5,
         kbest=None,
         ngram_hi=3,
         jobs=1,
         seed=1,
         *event_sel):
    print(locals())
    #return
    #event_sel=[70, 71]
    #nreps=1
    df = read_train()
    X, y = preprocess(df, event_sel=event_sel, ngrams=(2, ngram_hi))

    best = evaluate_hyper(
        X,
        y,
        hyper_objective,
        neval=neval,
        nfolds=nfolds,
        ncvjobs=ncvjobs,
        nreps=nreps,
        nbest=kbest,
        njobs=jobs,
        seed=seed,
    )

    print('Final best: {}'.format(best))

    return

コード例 #5

0

ファイルを表示

ファイル: model.py プロジェクト: vishnusk12/Comparative-Analysis-of-NLP-Techniques-for-Information-Extraction

def FinalIndexDoc2Vec(final_query, desc):
    desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10]
    sentences = []
    for item_no, line in enumerate(desc):
        sentences.append(LabeledSentence(line, [item_no]))
    dm = 1
    size = 300
    context_window = 50
    seed = 42
    min_count = 1
    alpha = 0.5
    max_iter = 200
    model = gensim.models.doc2vec.Doc2Vec(documents=sentences,
                                          dm=dm,
                                          alpha=alpha,
                                          seed=seed,
                                          min_count=min_count,
                                          max_vocab_size=None,
                                          window=context_window,
                                          size=size,
                                          sample=1e-4,
                                          negative=5,
                                          iter=max_iter)
    tokens = final_query.split()
    new_vector = model.infer_vector(tokens)
    sims = model.docvecs.most_similar([new_vector], topn=10)
    refined = [i[0] for i in sims if i[1] > 0]
    return refined

コード例 #6

0

ファイルを表示

 def on_status(self, status):
     tweet = str(status.text).lower()
     if any(i in tweet for i in targetWords):
         preprocessedtweet = preprocess(tweet)
         predictedlabel = predictInterest([preprocessedtweet], NBClassifier,
                                          bestCount_Vectorizer)
         print(tweet)
         print(label[predictedlabel[0]])

コード例 #7

0

ファイルを表示

ファイル: model.py プロジェクト: swraj28/Introduction-to-Digital-Image-Processing

def predict(image):

    pp_image = preprocess(image)
    pp_image = pp_image.reshape(-1, 45, 45, 1)

    prediction = model.predict(pp_image)
    cls = classes[str(np.argmax(prediction))]

    return cls

コード例 #8

0

ファイルを表示

ファイル: Perceptron.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import Perceptron
    clf = Perceptron(max_iter=50, tol=1e-3, random_state=1)

    return benchmark(clf, X_train, y_train)

コード例 #9

0

ファイルを表示

ファイル: KNN.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.neighbors import KNeighborsClassifier
    clf = KNeighborsClassifier(n_neighbors=10)

    return benchmark(clf, X_train, y_train)

コード例 #10

0

ファイルを表示

ファイル: Ridge.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import RidgeClassifier
    clf = RidgeClassifier(tol=1e-2, solver="sag", random_state=1)

    return benchmark(clf, X_train, y_train)

コード例 #11

0

ファイルを表示

ファイル: NearestCentroid.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.neighbors import NearestCentroid
    clf = NearestCentroid()

    return benchmark(clf, X_train, y_train)

コード例 #12

0

ファイルを表示

def main(event_sel=None):
    df = read_train()

#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
#    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.naive_bayes import BernoulliNB, MultinomialNB
    clf = BernoulliNB(alpha=.01)

    return benchmark(clf, X_train, y_train)

コード例 #13

0

ファイルを表示

 def reset_all(self):
     """ Returns a stack of 4 copies of the original reset state for
     each runner: return shape is (batch_size, 88, 80, group_size)"""
     reset_env = [env.reset() for env in self.env]
     # (64, 210, 163, 3)
     reset_env = np.array([preprocess(reset_env[i]) for i in range(self.batch_size)])
     # (64, 88, 80, 1)
     reset_env_stack = [reset_env for k in range(self.group_size)]
     # (4, 64, 88, 80, 1)
     reset_env = np.concatenate(reset_env_stack, axis=-1)
     # (64, 88, 80, 4)
     return reset_env

コード例 #14

0

ファイルを表示

ファイル: Search.py プロジェクト: Esokids/IR_Ranking

def preprocess_search(keyword):
    corpus = df.columns
    keyword = preprocess(keyword)
    keyword = make_bigrams(keyword)
    search_words = list()

    for word in keyword:
        if '*' in word:
            search_words.extend(fnmatch.filter(corpus, word))
        else:
            search_words.extend(difflib.get_close_matches(word, corpus))
    return search_words

コード例 #15

0

ファイルを表示

ファイル: model.py プロジェクト: vishnusk12/Comparative-Analysis-of-NLP-Techniques-for-Information-Extraction

def FinalIndexJaccard(final_query, desc):
    desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10]
    list_indx = []
    for indx, i in enumerate(desc):
        dict_indx = {}
        dict_indx['index'] = indx
        dict_indx['similarity'] = 1 - distance.jaccard(final_query, i)
        if dict_indx['similarity'] > .5:
            list_indx.append(dict_indx)
    refined = sorted(range(len(list_indx)),
                     key=lambda index: list_indx[index]['similarity'],
                     reverse=True)[:10]
    return refined

コード例 #16

0

ファイルを表示

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=event_sel)
    #    X_train, y_train = preprocess(df, event_sel=[31, 78])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])

    from lightgbm import LGBMClassifier
    clf = LGBMClassifier(verbose=1, random_state=1, silent=0, n_estimators=400)

    return benchmark(clf, X_train.astype(float), y_train)

コード例 #17

0

ファイルを表示

def predict(input_path, output_path, resources_path):

    #The prediction works by uploading first of all the vocabularies used in training phase
    vocab = dict()
    vocabu = dict()
    with open('../resources/vocab.pkl', 'rb') as f:
        vocab = pickle.load(f)
    with open('../resources/vocabu.pkl', 'rb') as f:
        vocabu = pickle.load(f)

    #we pass the whole test dataset through the preprocessing phase and save the gold data in a file (this in case the input file has whitespaces, in which case a gold data set can be retrieved, otherwise
    # it will just generate a random variable file that we can just ignore)

    TInput, TLabel, TFullline, TFulllabelline, lens, senlens = preprocess(
        input_path)
    X_testu, X_testb, Y_test, vocabt, vocabut = buildvector(
        TInput, TLabel, TFullline, TFulllabelline, vocab, vocabu, lens)
    savegoldtofile(TLabel)

    #initialize the model and upload the weights and configuration from the file
    model = create_keras_model((len(vocab) + 1), (len(vocabu) + 1), 256)
    model = load_model(resources_path)

    #proceed with the prediction. We feed the X vectors to the predict function and get back a vector with one hot encoding. We reverese the encoding through the argmax function, retrieve the data in numerical form
    #and proceed by assigning the label corresponding the the value
    #in the end we remove the padding that we added through the preprocessing and save the file

    prediction = model.predict([X_testu, X_testb])
    text_file = open(output_path, "w+")
    sen = []
    count = 0
    for row in prediction:

        line = []
        for element in row:
            val = np.argmax(element)
            if val == 0:
                line.extend("B")
            elif val == 1:
                line.extend("E")
            elif val == 2:
                line.extend("I")
            else:
                line.extend("S")
        linez = ''.join(line)
        linez2 = linez[:senlens[count]]
        sen.append(linez2)
        text_file.write(linez2 + '\n')
        count += 1

    text_file.close()

コード例 #18

0

ファイルを表示

ファイル: model.py プロジェクト: vishnusk12/Comparative-Analysis-of-NLP-Techniques-for-Information-Extraction

def QueryProcess(query):
    data = Data(path)
    cleaned_query = preprocess(query)
    txtn = nlp(cleaned_query)
    txtp = nlp(query)
    np = [np.text for np in txtn.noun_chunks]
    ner = [ent.text for ent in txtp.ents]
    tokens = cleaned_query.split()
    keywords = [
        token.text for token in txtn if token.pos_ == 'VERB'
        or token.pos_ == 'ADJ' or token.pos_ == 'NOUN' or token.pos_ == 'PROPN'
    ]
    synonyms = list(
        set([
            l.name() for i in keywords for syn in wordnet.synsets(i)
            for l in syn.lemmas()
        ]))
    synonyms = [i for i in synonyms if '_' not in i]
    new_text = tokens + synonyms + np + [i.lower() for i in ner] + [query]
    final_query = ' '.join(new_text)
    cleaned_data = [''.join(preprocess(i) + i) for i in data]
    doc = [final_query] + cleaned_data
    indx = indexLSA(doc)  # or indexTFIDF(doc)
    return final_query, indx, data

コード例 #19

0

ファイルを表示

ファイル: SGDClassifierL2.py プロジェクト: WojciechMigda/TCO-CDC-OIICS

def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import SGDClassifier
    clf = SGDClassifier(alpha=.0001,
                        max_iter=50,
                        tol=1e-3,
                        penalty='l2',
                        random_state=1)

    return benchmark(clf, X_train, y_train)

コード例 #20

0

ファイルを表示

def main(event_sel=None):
    df = read_train()

    X_train, y_train = preprocess(df, event_sel=event_sel)
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])

    from sklearn.svm import LinearSVC
    clf = LinearSVC(loss='squared_hinge',
                    penalty='l2',
                    dual=False,
                    tol=1e-3,
                    verbose=0,
                    random_state=1)

    return benchmark(clf, X_train, y_train)

コード例 #21

0

ファイルを表示

def main(event_sel=None):
    df = read_train()

#    X_train, y_train = preprocess(df, event_sel=event_sel)
#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    X_train, y_train = preprocess(df, event_sel=[62, 63, 60], ngrams=(2,4))

    print('Extracting best features by a chi-squared test')
    from sklearn.feature_selection import SelectKBest, chi2
    ch2 = SelectKBest(chi2, k=12000)
    X_train = ch2.fit_transform(X_train, y_train)
    print('Extracting done, {}'.format(X_train.shape))

    from sklearn.svm import LinearSVC
    clf = LinearSVC(loss='squared_hinge', penalty='l2', dual=False, tol=1e-3, verbose=0, random_state=1)

    return benchmark(clf, X_train, y_train)

コード例 #22

0

ファイルを表示

ファイル: Manager.py プロジェクト: abiraja2004/TextMining-TextSummarization

def summarizeDocuments(documents, stopwords, useTfIdfSimilarity,
                       useSentimentSimilarity, useWordModel, usePageRank,
                       useAggregateClustering, length, anaphoraResolution,
                       alphaValueForPagerank, alphaValueForMMR, debugMode):

    documents = preprocess(documents, femaleNamesFileName, maleNamesFileName,
                           anaphoraResolution, debugMode)
    sentenceSimilarities = getTfIdfValues(documents, stopwords)

    matrices = list()
    flat_sentences = [
        sentence for document in documents for sentence in document
    ]

    if useSentimentSimilarity:
        positiveWords = getPositiveWords(positiveSentimentFileName)
        negativeWords = getnegativeWords(negativeSentimentFileName)
        (pos, neg) = analyzeSentiment(flat_sentences, positiveWords,
                                      negativeWords, debugMode)
        matrices.append(pos)
        matrices.append(neg)

    if useWordModel:
        word2Vec = getWordToVectorMatrix(flat_sentences, debugMode)
        matrices.append(word2Vec)

    if useTfIdfSimilarity or len(matrices) == 0:
        matrices.append(sentenceSimilarities["tfidf_cosine"])
    # USE THE MATRICES LIST TO COMBINE THEM BY MULTIPLICATION ACCORDING TO SELECTION
    aggregateSimilartyMatrix = calcAggregateSimMatrix(matrices)

    # DETERMINE WHETHER PAGERANK OR CLUSTERING ALGORITHM
    if (usePageRank):
        output = usePageRankImplementation(documents, aggregateSimilartyMatrix,
                                           length, alphaValueForPagerank,
                                           alphaValueForMMR, debugMode)
    else:
        output = useClusteringAlgorithm(documents, aggregateSimilartyMatrix,
                                        length, sentenceSimilarities,
                                        useAggregateClustering)
    return output

コード例 #23

0

ファイルを表示

ファイル: sv2_train.py プロジェクト: timothymillar/SV2

def main():
	init_time = int(time())
 	parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,usage=splash.replace("       ","",1)+__useage___,add_help=False)
	inArgs,genoArgs,optArgs = parser.add_argument_group('input arguments'),parser.add_argument_group('genotype arguments'),parser.add_argument_group('optional arguments')
	inArgs.add_argument('-i','-bam',type=str,default=None,nargs='*')
	inArgs.add_argument('-b','-bed',type=str,default=None,nargs='*')
	inArgs.add_argument('-v','-vcf',type=str,default=None,nargs='*')
	inArgs.add_argument('-snv',type=str,default=None,nargs='*')
	inArgs.add_argument('-p','-ped',type=str,default=None,nargs='*')
	genoArgs.add_argument('-g','-genome',required=False,default='hg19',type=str)
	genoArgs.add_argument('-pcrfree',required=False,default=False,action="store_true")
	genoArgs.add_argument('-M',default=False,required=False,action="store_true")
	genoArgs.add_argument('-pre',required=False,default=None)
	genoArgs.add_argument('-feats',required=False,default=None)
	optArgs.add_argument('-L','-log',default=None,required=False)
	optArgs.add_argument('-T','-tmp-dir',default=os.getcwd()+'/sv2_tmp_'+rand_id(),required=False)
	optArgs.add_argument('-s','-seed',required=False,default=42,type=int)
	optArgs.add_argument('-o','-out',required=False,default="sv2_training_features",type=str)
	optArgs.add_argument('-O','-odir',required=False,default=os.getcwd(),type=str)
	optArgs.add_argument('-h','-help',required=False,action="store_true",default=False)
	args = parser.parse_args()
	bams,bed,vcf,snv,ped = args.i,args.b,args.v,args.snv,args.p
	gen,pcrfree,legacy_m,predir,featsdir= args.g,args.pcrfree,args.M,args.pre,args.feats
	logfh, tmp_dir, seed, ofh, odir = args.L,args.T,args.s,args.o,args.O
	_help = args.h
	if (_help==True or len(sys.argv)==1):
		print splash+__useage___
		sys.exit(0)
	if logfh!=None:
		lfh = open(logfh,'w')
		sys.stderr=lfh
	preprocess_files,feats_files={},{}
	gens = ['hg19','hg38','mm10']
	olog = logfh
	if olog == None: olog = 'STDOUT'
	print 'sv2 version:{}    report bugs to <dantaki at ucsd dot edu>       error messages located in {}'.format(__version__,olog)
	Confs=Config()
	if bams==None and predir==None and featsdir==None:
		print 'FATAL ERROR: No BAM file specified <-i, -bam  FILE ...>'
		sys.stderr.write('FATAL ERROR: No BAM file specified <-i, -bam  FILE ...>\n')
		sys.exit(1)
	if snv==None and predir==None and featsdir==None:
		print 'FATAL ERROR: No SNV VCF file specified <-snv  FILE ...>'
		sys.stderr.write('FATAL ERROR: No SNV VCF file specified <-snv  FILE ...>\n')
		sys.exit(1)
	if ped==None:
		print 'FATAL ERROR: No PED file specified <-p, -ped  FILE ...>'
		sys.stderr.write('FATAL ERROR: No PED file specified <-p, -ped  FILE ...>\n')
		sys.exit(1)
	if bed==None and vcf==None:
		print 'FATAL ERROR: No SVs provided <-b, -bed  BED ...> <-v,-vcf  VCF ...>'
		sys.stderr.write('FATAL ERROR: No SVs provided <-b, -bed  BED ...> <-v,-vcf  VCF ...>\n')
		sys.exit(1)
	if gen not in gens:
		print 'FATAL ERROR -g must be hg19 or hg38. NOT {}'.format(gen)
		sys.stderr.write('FATAL ERROR -g must be hg19 or hg38. NOT {}\n'.format(gen))
		sys.exit(1)
	Peds=ped_init(ped)
	if bams!=None: Bams=bam_init(bams,Peds,snv_init(snv),gen)
	SV = sv_init(bed,vcf,gen)
	ofh = ofh.replace('.vcf','').replace('.out','').replace('.txt','')
	make_dir(tmp_dir)
	tmp_dir=slash_check(tmp_dir)
	if not odir.endswith('/'): odir = odir+'/'
	make_dir(odir)
	"""
	PREPROCESSING
	"""
	if predir == None:
		outdir = odir+'sv2_preprocessing/'
		make_dir(outdir)
		for bam in Bams:
			preofh = outdir+bam.id+'_sv2_preprocessing.txt'
			preprocess_files[bam.id]=preofh
			preprocess(bam,preofh,seed,gen,tmp_dir)
	else:
		predir=slash_check(predir)
		for fh in glob(predir+'*sv2_preprocessing.txt'):
			f = open(fh)
			if sum(1 for l in open(fh)) <= 1: continue
			else:
				preids=[]
				for l in f:
					if l.startswith('#'):continue
					preids.append(l.rstrip('\n').split('\t').pop(0))
			f.close()
			for iid in set(preids):
				if iid in Peds.ids : preprocess_files[iid]=fh
	report_time(init_time,'PREPROCESSING COMPLETE')
	""""
	FEATURE EXTRACTION
	"""
	if featsdir == None:
		outdir = odir+'sv2_features/'
		make_dir(outdir)
		for bam in Bams:
			if preprocess_files.get(bam.id) == None:
				sys.stderr.write('WARNING: BAM sample id {} not found in preprocessing files. Skipping ...\n'.format(bam.id))
				continue
			prefh = preprocess_files[bam.id]
			featfh = outdir+bam.id+'_sv2_features.txt'
			feats_files[bam.id]=featfh
			extract_feats(bam,SV.raw,prefh,featfh,gen,pcrfree,legacy_m,Confs,tmp_dir)
	else:
		featsdir=slash_check(featsdir)
		for fh in glob(featsdir+'*sv2_features.txt'):
			f = open(fh)
			if sum(1 for l in open(fh)) <= 1: continue
			else:
				featsid=[]
				for l in f:
					if l.startswith('#'):continue
					featsid.append(l.rstrip('\n').split('\t').pop(5))
			f.close()
			for iid in set(featsid):
				if iid in Peds.ids : feats_files[iid]=fh
	feats=[]
	train_dir = odir+'sv2_training_features/'
	make_dir(train_dir)
	for iid in feats_files:
		with open(feats_files[iid]) as f:
			for l in f: feats.append(tuple(l.rstrip('\n').split('\t')))
	sv2_train_output(feats,Peds,gen,train_dir+ofh)
	shutil.rmtree(tmp_dir)
	lfh.close()
	report_time(init_time,'FEATURE EXTRACTION COMPLETE')

コード例 #24

0

ファイルを表示

ファイル: train.py プロジェクト: zzhzzhzzhzzhzzh/nlp20projects

def train(args):
    #数据预处理，生成vocab和data
    preprocess(args['cap_path'], args['vocab_path'], args['data_path'])

    if not os.path.exists(args['model_path']):
        os.mkdir(args['model_path'])

    #对图片进行处理，进行数据增强
    transform = transforms.Compose([
        transforms.Resize((args['resize'], args['resize'])),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)

    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    data_loader = get_loader(args['train_img_path'],
                             Data,
                             vocab,
                             transform,
                             args['batch_size'],
                             shuffle=True,
                             num_workers=args['num_workers'])

    encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args['learning_rate'])

    total_step = len(data_loader)
    for epoch in range(args['num_epochs']):
        for i, (images, captions, lengths) in enumerate(data_loader):
            images = images.cuda()
            captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            #打印训练信息
            if i % args['log_step'] == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], i, total_step,
                            loss.item(), np.exp(loss.item())))

            #保存模型
            if (i + 1) % args['save_step'] == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        #每个epoch结束也保存一次模型
        torch.save(
            decoder.state_dict(),
            os.path.join(args['model_path'],
                         'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        torch.save(
            encoder.state_dict(),
            os.path.join(args['model_path'],
                         'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))

コード例 #25

0

ファイルを表示

def detectPlatesInScene(imgOriginalScene,
                        PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel,
                        PlateWidthPaddingFactor, PlateHeightPaddingFactor,
                        MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea,
                        MaxDiagSizeMultipleAway, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars,
                        MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, debugMode):
    """ License Plate Detection in a given input image scene, using geometrical analysis techniques """

    # Pre-processing (CSC --> contrast --> blur --> threshold):
    imgGrayscaleScene, imgThreshScene = preprocess(imgOriginalScene,
                                                   PreprocessGaussKernel,
                                                   PreprocessThreshBlockSize,
                                                   PreprocessThreshweight,
                                                   PreprocessMorphKernel)

    # Find all possible characters in the scene (finds all contours that could be characters, w/o OCR yet):
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene,
                                                          MinPixelWidth, MaxPixelWidth,
                                                          MinPixelHeight, MaxPixelHeight,
                                                          MinAspectRatio, MaxAspectRatio,
                                                          MinPixelArea, MaxPixelArea,
                                                          debugMode)

    # Given a list of all possible chars, find groups of matching characters (later on, each group will attempt to be recognized as a plate):
    listOfListsOfMatchingCharsInScene = findListOfListsOfMatchingChars(listOfPossibleCharsInScene,
                                                                       MinNumberOfMatchingChars,
                                                                       MaxNumberOfMatchingChars,
                                                                       MinAngleBetweenChars, MaxAngleBetweenChars,
                                                                       MinChangeInArea, MaxChangeInArea,
                                                                       MinChangeInWidth, MaxChangeInWidth,
                                                                       MinChangeInHeight, MaxChangeInHeight,
                                                                       MaxDiagSizeMultipleAway)

    # For each group of matching chars, attempt to extract plate:
    listOfPossiblePlates = []
    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:

        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars,  PlateWidthPaddingFactor, PlateHeightPaddingFactor)

        # Add plate to list of possible plates (if found):
        if possiblePlate.imgPlate is not None:
            listOfPossiblePlates.append(possiblePlate)

    info("%d possible plates found" % len(listOfPossiblePlates))

    # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
    if debugMode:

        height, width, _ = imgOriginalScene.shape

        # Original image:
        imwrite("img_original.jpg", imgOriginalScene)

        # Pre-processing images:
        imwrite("img_gray.jpg", imgGrayscaleScene)
        imwrite("img_threshold.jpg", imgThreshScene)

        # Possible characters in image:
        imgContours = zeros((height, width, 3), uint8)
        contours = []
        for possibleChar in listOfPossibleCharsInScene:
            contours.append(possibleChar.contour)
        drawContours(imgContours, contours, -1, Colors.white)
        imwrite("img_contours_possible_chars.jpg", imgContours)

        # Matching characters:
        imgContours = zeros((height, width, 3), uint8)
        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = randint(0, 255)
            intRandomGreen = randint(0, 255)
            intRandomRed = randint(0, 255)
            contours = []
            for matchingChar in listOfMatchingChars:
                contours.append(matchingChar.contour)
            drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
            imwrite("img_contours_matching_chars.jpg", imgContours)

        # Possible license-plates:
        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)
            line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Colors.red, 2)
            imwrite("img_contours_possible_plates_%d.jpg" % i, imgContours)
            imwrite("img_plate_%d.jpg" % i, listOfPossiblePlates[i].imgPlate)

        debug("Plate detection complete", True)

    return listOfPossiblePlates

コード例 #26

0

ファイルを表示

ファイル: Transfer.py プロジェクト: Yiming992/Pytorch-Style-Transfer

    ap = argparse.ArgumentParser(add_help=False)
    ap.add_argument('-c', '--content', required=True)
    ap.add_argument('-s', '--style', required=True)
    ap.add_argument('-a', '--alpha', default=1e-3)
    ap.add_argument('-b', '--beta', default=1.0)
    ap.add_argument('-e', '--steps', default=300)
    ap.add_argument('-h', '--img_h', default=512)
    ap.add_argument('-w', '--img_w', default=512)
    ap.add_argument('-o', '--output', default='./outputs/')
    ap.add_argument('-d', '--display', default=False)
    ap.add_argument('-n', '--name', required=True)

    args = vars(ap.parse_args())

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    content, style = preprocess(args['content'], args['style'], args['img_h'],
                                args['img_w'])

    content = content.to(device)
    style = style.to(device)
    neural_style = Neural_Style(content, style)
    neural_style.to(device)

    steps = int(args['steps'])
    LBFGS = torch.optim.LBFGS([neural_style.target])

    alpha = float(args['alpha'])
    beta = float(args['beta'])

    i = 0
    while i <= steps:

コード例 #27

0

ファイルを表示

 def reset_one(self, i):
     reset_state = self.env[i].reset()
     reset_state = preprocess(reset_state)
     reset_state = np.array(np.concatenate([reset_state for k in range(self.group_size)], axis=-1))
     return reset_state

コード例 #28

0

ファイルを表示

for image_name in files:
    start_time = time.time()
    _, image = os.path.split(image_name)

    image_name_base = os.path.splitext(image)[0]
    output_image_directory = os.path.abspath(
        os.path.join(output_directory, image_name_base))

    remove_directory(output_image_directory)
    ensure_directory(output_image_directory)

    # Preprocess image
    print("")
    print("Processing " + image)
    preprocessed_image = preprocess(image_name,
                                    output_image_directory,
                                    runmode=runmode)
    print("Finished preprocessing " + image)

    print("    ****    ")

    # Segment preprocessed image
    print("Segmenting " + image)
    words_li_li = segment(preprocessed_image,
                          output_image_directory,
                          runmode=runmode)
    print("Finished segmenting " + image)

    print("    ****    ")

    # Classify segmented image

コード例 #29

0

ファイルを表示

ファイル: DetectChars.py プロジェクト: Bilalkhanten/LPR

def detectCharsInPlates(listOfPossiblePlates, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight,
                        PreprocessMorphKernel, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight,
                        MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea, MinDiagSizeMultipleAway, MaxDiagSizeMultipleAway,
                        MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars,
                        MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight,
                        MaxChangeInHeight, ResizedCharImageWidth, ResizedCharImageHeight, kNearest, DebugMode):
    """ Detect characters in the pre-detected plate (OCR analysis, over KNN engine) """

    # Early break condition (empty input):
    if len(listOfPossiblePlates) == 0:
        return listOfPossiblePlates

    # For each possible plate --> preprocess, find all characters, try to group them, remove overlaps and perform OCR:
    intPlateCounter = 0
    longestListOfMatchingCharsInPlate = []
    for possiblePlate in listOfPossiblePlates:

        # Pre-processing (CSC --> contrast --> blur --> threshold):
        possiblePlate.imgGrayscale, imgThreshScene = preprocess(possiblePlate.imgPlate,
                                                                PreprocessGaussKernel,
                                                                PreprocessThreshBlockSize,
                                                                PreprocessThreshweight,
                                                                PreprocessMorphKernel)

        # Increase size of plate image for easier viewing and char detection
        possiblePlate.imgThresh = resize(imgThreshScene, (0, 0), fx=1.6, fy=1.6)

        # Threshold again to eliminate any gray areas:
        _, possiblePlate.imgThresh = threshold(possiblePlate.imgThresh, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU)

        # Find all possible chars in the plate (finds all contours that could be chars):
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(possiblePlate.imgThresh,
                                                              MinPixelWidth, MaxPixelWidth,
                                                              MinPixelHeight, MaxPixelHeight,
                                                              MinAspectRatio, MaxAspectRatio,
                                                              MinPixelArea, MaxPixelArea)

        # Given a list of all possible chars, find groups of matching chars within the plate:
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(listOfPossibleCharsInPlate,
                                                                           MinNumberOfMatchingChars, MaxNumberOfMatchingChars,
                                                                           MinAngleBetweenChars, MaxAngleBetweenChars,
                                                                           MinChangeInArea, MaxChangeInArea,
                                                                           MinChangeInWidth, MaxChangeInWidth,
                                                                           MinChangeInHeight, MaxChangeInHeight,
                                                                           MaxDiagSizeMultipleAway)

        # If groups of matching chars were found in the plate:
        if len(listOfListsOfMatchingCharsInPlate) > 0:

            # Within each list of matching chars, sort chars from left to right and remove inner overlapping chars:
            for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
                listOfListsOfMatchingCharsInPlate[i].sort(key=lambda tmpMatchingChar: tmpMatchingChar.intCenterX)
                listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(listOfListsOfMatchingCharsInPlate[i],
                                                                                   MinDiagSizeMultipleAway)

            # Within each possible plate, loop through all the vectors of matching chars, get the index of the one with the most chars:
            intLenOfLongestListOfChars = 0
            intIndexOfLongestListOfChars = 0
            for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
                if len(listOfListsOfMatchingCharsInPlate[i]) > intLenOfLongestListOfChars:
                    intLenOfLongestListOfChars = len(listOfListsOfMatchingCharsInPlate[i])
                    intIndexOfLongestListOfChars = i

            # Suppose that the longest list of matching chars within the plate is the actual list of chars:
            longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[intIndexOfLongestListOfChars]

            # Characters recognition (OCR):
            possiblePlate.strChars = recognizeCharsInPlate(possiblePlate.imgThresh,
                                                           longestListOfMatchingCharsInPlate,
                                                           ResizedCharImageWidth,
                                                           ResizedCharImageHeight,
                                                           kNearest,
                                                           intPlateCounter,
                                                           DebugMode)

        # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
        if DebugMode:

            height, width, _ = possiblePlate.imgPlate.shape
            contours1 = []; imgContours1 = zeros((height, width, 3), uint8)
            contours2 = []; imgContours2 = zeros((height, width, 3), uint8)
            contours3 = []; imgContours3 = zeros((height, width, 3), uint8)
            contours4 = []; imgContours4 = zeros((height, width, 3), uint8)

            for possibleChar in listOfPossibleCharsInPlate:
                contours1.append(possibleChar.contour)
            drawContours(imgContours1, contours1, -1, Colors.white)

            if len(listOfListsOfMatchingCharsInPlate) > 0:

                for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                    intRandomBlue = randint(0, 255)
                    intRandomGreen = randint(0, 255)
                    intRandomRed = randint(0, 255)
                    for matchingChar in listOfMatchingChars:
                        contours2.append(matchingChar.contour)
                    drawContours(imgContours2, contours2, -1, (intRandomBlue, intRandomGreen, intRandomRed))

                for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                    intRandomBlue = randint(0, 255)
                    intRandomGreen = randint(0, 255)
                    intRandomRed = randint(0, 255)
                    for matchingChar in listOfMatchingChars:
                        contours3.append(matchingChar.contour)
                    drawContours(imgContours3, contours3, -1, (intRandomBlue, intRandomGreen, intRandomRed))

                for matchingChar in longestListOfMatchingCharsInPlate:
                    contours4.append(matchingChar.contour)
                drawContours(imgContours4, contours4, -1, Colors.white)

            imwrite("img_possible_plate_%d.jpg" % intPlateCounter, possiblePlate.imgPlate)
            imwrite("img_possible_plate_gray_%d.jpg" % intPlateCounter, possiblePlate.imgGrayscale)
            imwrite("img_possible_plate_threshold_scene_%d.jpg" % intPlateCounter, imgThreshScene)
            imwrite("img_possible_plate_threshold_%d.jpg" % intPlateCounter, possiblePlate.imgThresh)
            imwrite("img_possible_plate_contours1_%d.jpg" % intPlateCounter, imgContours1)
            if len(listOfListsOfMatchingCharsInPlate) > 0:
                imwrite("img_possible_plate_contours2_%d.jpg" % intPlateCounter, imgContours2)
                imwrite("img_possible_plate_contours3_%d.jpg" % intPlateCounter, imgContours3)
                imwrite("img_possible_plate_contours4_%d.jpg" % intPlateCounter, imgContours4)

            if len(listOfListsOfMatchingCharsInPlate) > 0:
                debug("Characters found in plate number #%d = %s" % (intPlateCounter, possiblePlate.strChars), True)
                intPlateCounter = intPlateCounter + 1
            else:
                debug("Characters found in plate number #%d = (none)" % intPlateCounter, True)
                intPlateCounter = intPlateCounter + 1

        # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
        # If no groups of matching chars were found in the plate, continue for next plate candidate:
        if len(listOfListsOfMatchingCharsInPlate) == 0:

            possiblePlate.strChars = ""
            continue

    if DebugMode:
        debug("Characters detection complete", True)

    return listOfPossiblePlates

コード例 #30

0

ファイルを表示

ファイル: FastIterativeMethod.py プロジェクト: cvwang/gems_iterative

 def preprocess(self):
     """ Preprocessing phase to execute boundary finding, reordering and equation calculation sequentially.
 """
     self._preprocessRan = True
     self._node_reorder2, self._reorder_E, self._L_inv, self._U_inv, self._L_k_inv, self._U_k_inv, self._boundary_start_number, self._index_start, self._T1, self._T2 = preprocess(
         self._partition_list, self._nparts, self._n, self._E)