Exemple #1
0
def text_rank(data):
    """Applies text_rank algorithm on text passed as a parameter and returns a summary"""

    sentences = tokenize(data)
    l = len(sentences)
    clean_sentences = preprocess(sentences)
    sentence_vectors = vectorize(clean_sentences)
    # Computing Similarity Matrix
    s_mat = np.zeros([l, l])

    for i in range(l):
        for j in range(l):
            if i != j:
                s_mat[i][j] = cosine_similarity(
                    sentence_vectors[i].reshape(1, 50),
                    sentence_vectors[j].reshape(1, 50))[0, 0]

    # Applying PageRank Algorithm - To Calculate Sentence Scores
    graph = nx.from_numpy_array(s_mat)
    sentence_scores = nx.pagerank(graph)

    ranked_sentences = sorted(
        ((sentence_scores[i], s) for i, s in enumerate(sentences)),
        reverse=True)

    summary = []
    x = round(l * 0.2)
    for i in range(x):
        summary.append(ranked_sentences[i][1])
    print(l)
    print(x)
    return clean_summary(summary)
Exemple #2
0
    def video_stream(self):

        ret, frame = self.cap.read()
        frame = cv2.flip(frame, 1)

        hand_img = preprocess(frame)
        probs_class_map, self.prediction = self.predict(hand_img)

        if len(self.current_word) != 0 and self.prediction == 'blank':
            self.blank_count += 1
            if self.blank_count > 80:
                self.sentence += ' '
                self.sentence += self.current_word
                self.current_word = ''
                for i in classes:
                    self.count[i] = 0
                self.blank_count = 0

        elif self.prediction != 'blank':
            self.count[self.prediction] += 1
            if self.count[self.prediction] > 50:
                self.current_word += self.prediction
                for i in classes:
                    self.count[i] = 0
                self.blank_count = 0
                self.plot(probs_class_map)

        self.updateDepositLabel(self.prediction, self.current_word,
                                self.sentence)
        cv2Img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
        img = Image.fromarray(cv2Img)
        imgtk = ImageTk.PhotoImage(image=img)
        self.video_label.imgtk = imgtk
        self.video_label.configure(image=imgtk)
        self.video_label.after(1, self.video_stream)
Exemple #3
0
    def step(self, action):
        next_state = [[] for empty in range(self.group_size)]
        reward_sum = np.zeros(self.batch_size)

        for i in range(self.group_size):
            outcomes = [env.step(act) for env, act in zip(self.env, action)]

            cols = [[], [], [], []]  # next_state, reward, done, info

            for j in range(self.batch_size):
                one_step = outcomes[j]
                for col, value in zip(cols, one_step):
                    col.append(value)
            cols = [np.array(col) for col in cols]

            cols[0] = np.array([preprocess(cols[0][k][:][:]) for k in range(self.batch_size)])
            next_state[i].append(cols[0])
            reward_sum += cols[1]
        # Now next_state has shape (group_size, 1, batch_size, 88, 80, 1)
        # So reshape to (group_size, batch_size, 88, 80, 1)
        # Split them, stack them to get (batch_size, 88, 80, group_size)
        next_state = np.reshape(next_state, [self.group_size, self.batch_size, 88, 80, 1])
        split_states = [next_state[k] for k in range(self.group_size)]
        next_state = np.array(np.concatenate(split_states, axis=-1))

        return next_state, reward_sum, cols[2], cols[3]
def main(neval=30,
         nfolds=5,
         ncvjobs=1,
         nreps=5,
         kbest=None,
         ngram_hi=3,
         jobs=1,
         seed=1,
         *event_sel):
    print(locals())
    #return
    #event_sel=[70, 71]
    #nreps=1
    df = read_train()
    X, y = preprocess(df, event_sel=event_sel, ngrams=(2, ngram_hi))

    best = evaluate_hyper(
        X,
        y,
        hyper_objective,
        neval=neval,
        nfolds=nfolds,
        ncvjobs=ncvjobs,
        nreps=nreps,
        nbest=kbest,
        njobs=jobs,
        seed=seed,
    )

    print('Final best: {}'.format(best))

    return
def FinalIndexDoc2Vec(final_query, desc):
    desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10]
    sentences = []
    for item_no, line in enumerate(desc):
        sentences.append(LabeledSentence(line, [item_no]))
    dm = 1
    size = 300
    context_window = 50
    seed = 42
    min_count = 1
    alpha = 0.5
    max_iter = 200
    model = gensim.models.doc2vec.Doc2Vec(documents=sentences,
                                          dm=dm,
                                          alpha=alpha,
                                          seed=seed,
                                          min_count=min_count,
                                          max_vocab_size=None,
                                          window=context_window,
                                          size=size,
                                          sample=1e-4,
                                          negative=5,
                                          iter=max_iter)
    tokens = final_query.split()
    new_vector = model.infer_vector(tokens)
    sims = model.docvecs.most_similar([new_vector], topn=10)
    refined = [i[0] for i in sims if i[1] > 0]
    return refined
Exemple #6
0
 def on_status(self, status):
     tweet = str(status.text).lower()
     if any(i in tweet for i in targetWords):
         preprocessedtweet = preprocess(tweet)
         predictedlabel = predictInterest([preprocessedtweet], NBClassifier,
                                          bestCount_Vectorizer)
         print(tweet)
         print(label[predictedlabel[0]])
def predict(image):

    pp_image = preprocess(image)
    pp_image = pp_image.reshape(-1, 45, 45, 1)

    prediction = model.predict(pp_image)
    cls = classes[str(np.argmax(prediction))]

    return cls
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import Perceptron
    clf = Perceptron(max_iter=50, tol=1e-3, random_state=1)

    return benchmark(clf, X_train, y_train)
Exemple #9
0
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.neighbors import KNeighborsClassifier
    clf = KNeighborsClassifier(n_neighbors=10)

    return benchmark(clf, X_train, y_train)
Exemple #10
0
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import RidgeClassifier
    clf = RidgeClassifier(tol=1e-2, solver="sag", random_state=1)

    return benchmark(clf, X_train, y_train)
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.neighbors import NearestCentroid
    clf = NearestCentroid()

    return benchmark(clf, X_train, y_train)
Exemple #12
0
def main(event_sel=None):
    df = read_train()

#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
#    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.naive_bayes import BernoulliNB, MultinomialNB
    clf = BernoulliNB(alpha=.01)

    return benchmark(clf, X_train, y_train)
Exemple #13
0
 def reset_all(self):
     """ Returns a stack of 4 copies of the original reset state for
     each runner: return shape is (batch_size, 88, 80, group_size)"""
     reset_env = [env.reset() for env in self.env]
     # (64, 210, 163, 3)
     reset_env = np.array([preprocess(reset_env[i]) for i in range(self.batch_size)])
     # (64, 88, 80, 1)
     reset_env_stack = [reset_env for k in range(self.group_size)]
     # (4, 64, 88, 80, 1)
     reset_env = np.concatenate(reset_env_stack, axis=-1)
     # (64, 88, 80, 4)
     return reset_env
Exemple #14
0
def preprocess_search(keyword):
    corpus = df.columns
    keyword = preprocess(keyword)
    keyword = make_bigrams(keyword)
    search_words = list()

    for word in keyword:
        if '*' in word:
            search_words.extend(fnmatch.filter(corpus, word))
        else:
            search_words.extend(difflib.get_close_matches(word, corpus))
    return search_words
def FinalIndexJaccard(final_query, desc):
    desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10]
    list_indx = []
    for indx, i in enumerate(desc):
        dict_indx = {}
        dict_indx['index'] = indx
        dict_indx['similarity'] = 1 - distance.jaccard(final_query, i)
        if dict_indx['similarity'] > .5:
            list_indx.append(dict_indx)
    refined = sorted(range(len(list_indx)),
                     key=lambda index: list_indx[index]['similarity'],
                     reverse=True)[:10]
    return refined
Exemple #16
0
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=event_sel)
    #    X_train, y_train = preprocess(df, event_sel=[31, 78])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])

    from lightgbm import LGBMClassifier
    clf = LGBMClassifier(verbose=1, random_state=1, silent=0, n_estimators=400)

    return benchmark(clf, X_train.astype(float), y_train)
Exemple #17
0
def predict(input_path, output_path, resources_path):

    #The prediction works by uploading first of all the vocabularies used in training phase
    vocab = dict()
    vocabu = dict()
    with open('../resources/vocab.pkl', 'rb') as f:
        vocab = pickle.load(f)
    with open('../resources/vocabu.pkl', 'rb') as f:
        vocabu = pickle.load(f)

    #we pass the whole test dataset through the preprocessing phase and save the gold data in a file (this in case the input file has whitespaces, in which case a gold data set can be retrieved, otherwise
    # it will just generate a random variable file that we can just ignore)

    TInput, TLabel, TFullline, TFulllabelline, lens, senlens = preprocess(
        input_path)
    X_testu, X_testb, Y_test, vocabt, vocabut = buildvector(
        TInput, TLabel, TFullline, TFulllabelline, vocab, vocabu, lens)
    savegoldtofile(TLabel)

    #initialize the model and upload the weights and configuration from the file
    model = create_keras_model((len(vocab) + 1), (len(vocabu) + 1), 256)
    model = load_model(resources_path)

    #proceed with the prediction. We feed the X vectors to the predict function and get back a vector with one hot encoding. We reverese the encoding through the argmax function, retrieve the data in numerical form
    #and proceed by assigning the label corresponding the the value
    #in the end we remove the padding that we added through the preprocessing and save the file

    prediction = model.predict([X_testu, X_testb])
    text_file = open(output_path, "w+")
    sen = []
    count = 0
    for row in prediction:

        line = []
        for element in row:
            val = np.argmax(element)
            if val == 0:
                line.extend("B")
            elif val == 1:
                line.extend("E")
            elif val == 2:
                line.extend("I")
            else:
                line.extend("S")
        linez = ''.join(line)
        linez2 = linez[:senlens[count]]
        sen.append(linez2)
        text_file.write(linez2 + '\n')
        count += 1

    text_file.close()
def QueryProcess(query):
    data = Data(path)
    cleaned_query = preprocess(query)
    txtn = nlp(cleaned_query)
    txtp = nlp(query)
    np = [np.text for np in txtn.noun_chunks]
    ner = [ent.text for ent in txtp.ents]
    tokens = cleaned_query.split()
    keywords = [
        token.text for token in txtn if token.pos_ == 'VERB'
        or token.pos_ == 'ADJ' or token.pos_ == 'NOUN' or token.pos_ == 'PROPN'
    ]
    synonyms = list(
        set([
            l.name() for i in keywords for syn in wordnet.synsets(i)
            for l in syn.lemmas()
        ]))
    synonyms = [i for i in synonyms if '_' not in i]
    new_text = tokens + synonyms + np + [i.lower() for i in ner] + [query]
    final_query = ' '.join(new_text)
    cleaned_data = [''.join(preprocess(i) + i) for i in data]
    doc = [final_query] + cleaned_data
    indx = indexLSA(doc)  # or indexTFIDF(doc)
    return final_query, indx, data
def main(event_sel=None):
    df = read_train()

    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])
    X_train, y_train = preprocess(df, event_sel=event_sel)

    from sklearn.linear_model import SGDClassifier
    clf = SGDClassifier(alpha=.0001,
                        max_iter=50,
                        tol=1e-3,
                        penalty='l2',
                        random_state=1)

    return benchmark(clf, X_train, y_train)
Exemple #20
0
def main(event_sel=None):
    df = read_train()

    X_train, y_train = preprocess(df, event_sel=event_sel)
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
    #    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    #    X_train, y_train = preprocess(df, event_sel=[62, 63, 60])

    from sklearn.svm import LinearSVC
    clf = LinearSVC(loss='squared_hinge',
                    penalty='l2',
                    dual=False,
                    tol=1e-3,
                    verbose=0,
                    random_state=1)

    return benchmark(clf, X_train, y_train)
Exemple #21
0
def main(event_sel=None):
    df = read_train()

#    X_train, y_train = preprocess(df, event_sel=event_sel)
#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42])
#    X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11])
    X_train, y_train = preprocess(df, event_sel=[62, 63, 60], ngrams=(2,4))

    print('Extracting best features by a chi-squared test')
    from sklearn.feature_selection import SelectKBest, chi2
    ch2 = SelectKBest(chi2, k=12000)
    X_train = ch2.fit_transform(X_train, y_train)
    print('Extracting done, {}'.format(X_train.shape))

    from sklearn.svm import LinearSVC
    clf = LinearSVC(loss='squared_hinge', penalty='l2', dual=False, tol=1e-3, verbose=0, random_state=1)

    return benchmark(clf, X_train, y_train)
def summarizeDocuments(documents, stopwords, useTfIdfSimilarity,
                       useSentimentSimilarity, useWordModel, usePageRank,
                       useAggregateClustering, length, anaphoraResolution,
                       alphaValueForPagerank, alphaValueForMMR, debugMode):

    documents = preprocess(documents, femaleNamesFileName, maleNamesFileName,
                           anaphoraResolution, debugMode)
    sentenceSimilarities = getTfIdfValues(documents, stopwords)

    matrices = list()
    flat_sentences = [
        sentence for document in documents for sentence in document
    ]

    if useSentimentSimilarity:
        positiveWords = getPositiveWords(positiveSentimentFileName)
        negativeWords = getnegativeWords(negativeSentimentFileName)
        (pos, neg) = analyzeSentiment(flat_sentences, positiveWords,
                                      negativeWords, debugMode)
        matrices.append(pos)
        matrices.append(neg)

    if useWordModel:
        word2Vec = getWordToVectorMatrix(flat_sentences, debugMode)
        matrices.append(word2Vec)

    if useTfIdfSimilarity or len(matrices) == 0:
        matrices.append(sentenceSimilarities["tfidf_cosine"])
    # USE THE MATRICES LIST TO COMBINE THEM BY MULTIPLICATION ACCORDING TO SELECTION
    aggregateSimilartyMatrix = calcAggregateSimMatrix(matrices)

    # DETERMINE WHETHER PAGERANK OR CLUSTERING ALGORITHM
    if (usePageRank):
        output = usePageRankImplementation(documents, aggregateSimilartyMatrix,
                                           length, alphaValueForPagerank,
                                           alphaValueForMMR, debugMode)
    else:
        output = useClusteringAlgorithm(documents, aggregateSimilartyMatrix,
                                        length, sentenceSimilarities,
                                        useAggregateClustering)
    return output
Exemple #23
0
def main():
	init_time = int(time())
 	parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,usage=splash.replace("       ","",1)+__useage___,add_help=False)
	inArgs,genoArgs,optArgs = parser.add_argument_group('input arguments'),parser.add_argument_group('genotype arguments'),parser.add_argument_group('optional arguments')
	inArgs.add_argument('-i','-bam',type=str,default=None,nargs='*')
	inArgs.add_argument('-b','-bed',type=str,default=None,nargs='*')
	inArgs.add_argument('-v','-vcf',type=str,default=None,nargs='*')
	inArgs.add_argument('-snv',type=str,default=None,nargs='*')
	inArgs.add_argument('-p','-ped',type=str,default=None,nargs='*')
	genoArgs.add_argument('-g','-genome',required=False,default='hg19',type=str)
	genoArgs.add_argument('-pcrfree',required=False,default=False,action="store_true")
	genoArgs.add_argument('-M',default=False,required=False,action="store_true")
	genoArgs.add_argument('-pre',required=False,default=None)
	genoArgs.add_argument('-feats',required=False,default=None)
	optArgs.add_argument('-L','-log',default=None,required=False)
	optArgs.add_argument('-T','-tmp-dir',default=os.getcwd()+'/sv2_tmp_'+rand_id(),required=False)
	optArgs.add_argument('-s','-seed',required=False,default=42,type=int)
	optArgs.add_argument('-o','-out',required=False,default="sv2_training_features",type=str)
	optArgs.add_argument('-O','-odir',required=False,default=os.getcwd(),type=str)
	optArgs.add_argument('-h','-help',required=False,action="store_true",default=False)
	args = parser.parse_args()
	bams,bed,vcf,snv,ped = args.i,args.b,args.v,args.snv,args.p
	gen,pcrfree,legacy_m,predir,featsdir= args.g,args.pcrfree,args.M,args.pre,args.feats
	logfh, tmp_dir, seed, ofh, odir = args.L,args.T,args.s,args.o,args.O
	_help = args.h
	if (_help==True or len(sys.argv)==1):
		print splash+__useage___
		sys.exit(0)
	if logfh!=None:
		lfh = open(logfh,'w')
		sys.stderr=lfh
	preprocess_files,feats_files={},{}
	gens = ['hg19','hg38','mm10']
	olog = logfh
	if olog == None: olog = 'STDOUT'
	print 'sv2 version:{}    report bugs to <dantaki at ucsd dot edu>       error messages located in {}'.format(__version__,olog)
	Confs=Config()
	if bams==None and predir==None and featsdir==None:
		print 'FATAL ERROR: No BAM file specified <-i, -bam  FILE ...>'
		sys.stderr.write('FATAL ERROR: No BAM file specified <-i, -bam  FILE ...>\n')
		sys.exit(1)
	if snv==None and predir==None and featsdir==None:
		print 'FATAL ERROR: No SNV VCF file specified <-snv  FILE ...>'
		sys.stderr.write('FATAL ERROR: No SNV VCF file specified <-snv  FILE ...>\n')
		sys.exit(1)
	if ped==None:
		print 'FATAL ERROR: No PED file specified <-p, -ped  FILE ...>'
		sys.stderr.write('FATAL ERROR: No PED file specified <-p, -ped  FILE ...>\n')
		sys.exit(1)
	if bed==None and vcf==None:
		print 'FATAL ERROR: No SVs provided <-b, -bed  BED ...> <-v,-vcf  VCF ...>'
		sys.stderr.write('FATAL ERROR: No SVs provided <-b, -bed  BED ...> <-v,-vcf  VCF ...>\n')
		sys.exit(1)
	if gen not in gens:
		print 'FATAL ERROR -g must be hg19 or hg38. NOT {}'.format(gen)
		sys.stderr.write('FATAL ERROR -g must be hg19 or hg38. NOT {}\n'.format(gen))
		sys.exit(1)
	Peds=ped_init(ped)
	if bams!=None: Bams=bam_init(bams,Peds,snv_init(snv),gen)
	SV = sv_init(bed,vcf,gen)
	ofh = ofh.replace('.vcf','').replace('.out','').replace('.txt','')
	make_dir(tmp_dir)
	tmp_dir=slash_check(tmp_dir)
	if not odir.endswith('/'): odir = odir+'/'
	make_dir(odir)
	"""
	PREPROCESSING
	"""
	if predir == None:
		outdir = odir+'sv2_preprocessing/'
		make_dir(outdir)
		for bam in Bams:
			preofh = outdir+bam.id+'_sv2_preprocessing.txt'
			preprocess_files[bam.id]=preofh
			preprocess(bam,preofh,seed,gen,tmp_dir)
	else:
		predir=slash_check(predir)
		for fh in glob(predir+'*sv2_preprocessing.txt'):
			f = open(fh)
			if sum(1 for l in open(fh)) <= 1: continue
			else:
				preids=[]
				for l in f:
					if l.startswith('#'):continue
					preids.append(l.rstrip('\n').split('\t').pop(0))
			f.close()
			for iid in set(preids):
				if iid in Peds.ids : preprocess_files[iid]=fh
	report_time(init_time,'PREPROCESSING COMPLETE')
	""""
	FEATURE EXTRACTION
	"""
	if featsdir == None:
		outdir = odir+'sv2_features/'
		make_dir(outdir)
		for bam in Bams:
			if preprocess_files.get(bam.id) == None:
				sys.stderr.write('WARNING: BAM sample id {} not found in preprocessing files. Skipping ...\n'.format(bam.id))
				continue
			prefh = preprocess_files[bam.id]
			featfh = outdir+bam.id+'_sv2_features.txt'
			feats_files[bam.id]=featfh
			extract_feats(bam,SV.raw,prefh,featfh,gen,pcrfree,legacy_m,Confs,tmp_dir)
	else:
		featsdir=slash_check(featsdir)
		for fh in glob(featsdir+'*sv2_features.txt'):
			f = open(fh)
			if sum(1 for l in open(fh)) <= 1: continue
			else:
				featsid=[]
				for l in f:
					if l.startswith('#'):continue
					featsid.append(l.rstrip('\n').split('\t').pop(5))
			f.close()
			for iid in set(featsid):
				if iid in Peds.ids : feats_files[iid]=fh
	feats=[]
	train_dir = odir+'sv2_training_features/'
	make_dir(train_dir)
	for iid in feats_files:
		with open(feats_files[iid]) as f:
			for l in f: feats.append(tuple(l.rstrip('\n').split('\t')))
	sv2_train_output(feats,Peds,gen,train_dir+ofh)
	shutil.rmtree(tmp_dir)
	lfh.close()
	report_time(init_time,'FEATURE EXTRACTION COMPLETE')
def train(args):
    #数据预处理,生成vocab和data
    preprocess(args['cap_path'], args['vocab_path'], args['data_path'])

    if not os.path.exists(args['model_path']):
        os.mkdir(args['model_path'])

    #对图片进行处理,进行数据增强
    transform = transforms.Compose([
        transforms.Resize((args['resize'], args['resize'])),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    with open(args['vocab_path'], 'rb') as f:
        vocab = pickle.load(f)

    with open(args['data_path'], 'rb') as f:
        Data = pickle.load(f)

    data_loader = get_loader(args['train_img_path'],
                             Data,
                             vocab,
                             transform,
                             args['batch_size'],
                             shuffle=True,
                             num_workers=args['num_workers'])

    encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda()
    decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab),
                      args['num_layers']).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    params = list(decoder.parameters()) + list(
        encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args['learning_rate'])

    total_step = len(data_loader)
    for epoch in range(args['num_epochs']):
        for i, (images, captions, lengths) in enumerate(data_loader):
            images = images.cuda()
            captions = captions.cuda()
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]

            features = encoder(images)
            outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

            #打印训练信息
            if i % args['log_step'] == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args['num_epochs'], i, total_step,
                            loss.item(), np.exp(loss.item())))

            #保存模型
            if (i + 1) % args['save_step'] == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'decoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args['model_path'],
                                 'encoder-{}-{}.ckpt'.format(epoch + 1,
                                                             i + 1)))

        #每个epoch结束也保存一次模型
        torch.save(
            decoder.state_dict(),
            os.path.join(args['model_path'],
                         'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
        torch.save(
            encoder.state_dict(),
            os.path.join(args['model_path'],
                         'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
Exemple #25
0
def detectPlatesInScene(imgOriginalScene,
                        PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel,
                        PlateWidthPaddingFactor, PlateHeightPaddingFactor,
                        MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea,
                        MaxDiagSizeMultipleAway, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars,
                        MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, debugMode):
    """ License Plate Detection in a given input image scene, using geometrical analysis techniques """

    # Pre-processing (CSC --> contrast --> blur --> threshold):
    imgGrayscaleScene, imgThreshScene = preprocess(imgOriginalScene,
                                                   PreprocessGaussKernel,
                                                   PreprocessThreshBlockSize,
                                                   PreprocessThreshweight,
                                                   PreprocessMorphKernel)

    # Find all possible characters in the scene (finds all contours that could be characters, w/o OCR yet):
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene,
                                                          MinPixelWidth, MaxPixelWidth,
                                                          MinPixelHeight, MaxPixelHeight,
                                                          MinAspectRatio, MaxAspectRatio,
                                                          MinPixelArea, MaxPixelArea,
                                                          debugMode)

    # Given a list of all possible chars, find groups of matching characters (later on, each group will attempt to be recognized as a plate):
    listOfListsOfMatchingCharsInScene = findListOfListsOfMatchingChars(listOfPossibleCharsInScene,
                                                                       MinNumberOfMatchingChars,
                                                                       MaxNumberOfMatchingChars,
                                                                       MinAngleBetweenChars, MaxAngleBetweenChars,
                                                                       MinChangeInArea, MaxChangeInArea,
                                                                       MinChangeInWidth, MaxChangeInWidth,
                                                                       MinChangeInHeight, MaxChangeInHeight,
                                                                       MaxDiagSizeMultipleAway)

    # For each group of matching chars, attempt to extract plate:
    listOfPossiblePlates = []
    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:

        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars,  PlateWidthPaddingFactor, PlateHeightPaddingFactor)

        # Add plate to list of possible plates (if found):
        if possiblePlate.imgPlate is not None:
            listOfPossiblePlates.append(possiblePlate)

    info("%d possible plates found" % len(listOfPossiblePlates))

    # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
    if debugMode:

        height, width, _ = imgOriginalScene.shape

        # Original image:
        imwrite("img_original.jpg", imgOriginalScene)

        # Pre-processing images:
        imwrite("img_gray.jpg", imgGrayscaleScene)
        imwrite("img_threshold.jpg", imgThreshScene)

        # Possible characters in image:
        imgContours = zeros((height, width, 3), uint8)
        contours = []
        for possibleChar in listOfPossibleCharsInScene:
            contours.append(possibleChar.contour)
        drawContours(imgContours, contours, -1, Colors.white)
        imwrite("img_contours_possible_chars.jpg", imgContours)

        # Matching characters:
        imgContours = zeros((height, width, 3), uint8)
        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = randint(0, 255)
            intRandomGreen = randint(0, 255)
            intRandomRed = randint(0, 255)
            contours = []
            for matchingChar in listOfMatchingChars:
                contours.append(matchingChar.contour)
            drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
            imwrite("img_contours_matching_chars.jpg", imgContours)

        # Possible license-plates:
        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)
            line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Colors.red, 2)
            line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Colors.red, 2)
            imwrite("img_contours_possible_plates_%d.jpg" % i, imgContours)
            imwrite("img_plate_%d.jpg" % i, listOfPossiblePlates[i].imgPlate)

        debug("Plate detection complete", True)

    return listOfPossiblePlates
    ap = argparse.ArgumentParser(add_help=False)
    ap.add_argument('-c', '--content', required=True)
    ap.add_argument('-s', '--style', required=True)
    ap.add_argument('-a', '--alpha', default=1e-3)
    ap.add_argument('-b', '--beta', default=1.0)
    ap.add_argument('-e', '--steps', default=300)
    ap.add_argument('-h', '--img_h', default=512)
    ap.add_argument('-w', '--img_w', default=512)
    ap.add_argument('-o', '--output', default='./outputs/')
    ap.add_argument('-d', '--display', default=False)
    ap.add_argument('-n', '--name', required=True)

    args = vars(ap.parse_args())

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    content, style = preprocess(args['content'], args['style'], args['img_h'],
                                args['img_w'])

    content = content.to(device)
    style = style.to(device)
    neural_style = Neural_Style(content, style)
    neural_style.to(device)

    steps = int(args['steps'])
    LBFGS = torch.optim.LBFGS([neural_style.target])

    alpha = float(args['alpha'])
    beta = float(args['beta'])

    i = 0
    while i <= steps:
Exemple #27
0
 def reset_one(self, i):
     reset_state = self.env[i].reset()
     reset_state = preprocess(reset_state)
     reset_state = np.array(np.concatenate([reset_state for k in range(self.group_size)], axis=-1))
     return reset_state
Exemple #28
0
for image_name in files:
    start_time = time.time()
    _, image = os.path.split(image_name)

    image_name_base = os.path.splitext(image)[0]
    output_image_directory = os.path.abspath(
        os.path.join(output_directory, image_name_base))

    remove_directory(output_image_directory)
    ensure_directory(output_image_directory)

    # Preprocess image
    print("")
    print("Processing " + image)
    preprocessed_image = preprocess(image_name,
                                    output_image_directory,
                                    runmode=runmode)
    print("Finished preprocessing " + image)

    print("    ****    ")

    # Segment preprocessed image
    print("Segmenting " + image)
    words_li_li = segment(preprocessed_image,
                          output_image_directory,
                          runmode=runmode)
    print("Finished segmenting " + image)

    print("    ****    ")

    # Classify segmented image
Exemple #29
0
def detectCharsInPlates(listOfPossiblePlates, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight,
                        PreprocessMorphKernel, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight,
                        MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea, MinDiagSizeMultipleAway, MaxDiagSizeMultipleAway,
                        MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars,
                        MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight,
                        MaxChangeInHeight, ResizedCharImageWidth, ResizedCharImageHeight, kNearest, DebugMode):
    """ Detect characters in the pre-detected plate (OCR analysis, over KNN engine) """

    # Early break condition (empty input):
    if len(listOfPossiblePlates) == 0:
        return listOfPossiblePlates

    # For each possible plate --> preprocess, find all characters, try to group them, remove overlaps and perform OCR:
    intPlateCounter = 0
    longestListOfMatchingCharsInPlate = []
    for possiblePlate in listOfPossiblePlates:

        # Pre-processing (CSC --> contrast --> blur --> threshold):
        possiblePlate.imgGrayscale, imgThreshScene = preprocess(possiblePlate.imgPlate,
                                                                PreprocessGaussKernel,
                                                                PreprocessThreshBlockSize,
                                                                PreprocessThreshweight,
                                                                PreprocessMorphKernel)

        # Increase size of plate image for easier viewing and char detection
        possiblePlate.imgThresh = resize(imgThreshScene, (0, 0), fx=1.6, fy=1.6)

        # Threshold again to eliminate any gray areas:
        _, possiblePlate.imgThresh = threshold(possiblePlate.imgThresh, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU)

        # Find all possible chars in the plate (finds all contours that could be chars):
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(possiblePlate.imgThresh,
                                                              MinPixelWidth, MaxPixelWidth,
                                                              MinPixelHeight, MaxPixelHeight,
                                                              MinAspectRatio, MaxAspectRatio,
                                                              MinPixelArea, MaxPixelArea)

        # Given a list of all possible chars, find groups of matching chars within the plate:
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(listOfPossibleCharsInPlate,
                                                                           MinNumberOfMatchingChars, MaxNumberOfMatchingChars,
                                                                           MinAngleBetweenChars, MaxAngleBetweenChars,
                                                                           MinChangeInArea, MaxChangeInArea,
                                                                           MinChangeInWidth, MaxChangeInWidth,
                                                                           MinChangeInHeight, MaxChangeInHeight,
                                                                           MaxDiagSizeMultipleAway)

        # If groups of matching chars were found in the plate:
        if len(listOfListsOfMatchingCharsInPlate) > 0:

            # Within each list of matching chars, sort chars from left to right and remove inner overlapping chars:
            for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
                listOfListsOfMatchingCharsInPlate[i].sort(key=lambda tmpMatchingChar: tmpMatchingChar.intCenterX)
                listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(listOfListsOfMatchingCharsInPlate[i],
                                                                                   MinDiagSizeMultipleAway)

            # Within each possible plate, loop through all the vectors of matching chars, get the index of the one with the most chars:
            intLenOfLongestListOfChars = 0
            intIndexOfLongestListOfChars = 0
            for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
                if len(listOfListsOfMatchingCharsInPlate[i]) > intLenOfLongestListOfChars:
                    intLenOfLongestListOfChars = len(listOfListsOfMatchingCharsInPlate[i])
                    intIndexOfLongestListOfChars = i

            # Suppose that the longest list of matching chars within the plate is the actual list of chars:
            longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[intIndexOfLongestListOfChars]

            # Characters recognition (OCR):
            possiblePlate.strChars = recognizeCharsInPlate(possiblePlate.imgThresh,
                                                           longestListOfMatchingCharsInPlate,
                                                           ResizedCharImageWidth,
                                                           ResizedCharImageHeight,
                                                           kNearest,
                                                           intPlateCounter,
                                                           DebugMode)

        # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
        if DebugMode:

            height, width, _ = possiblePlate.imgPlate.shape
            contours1 = []; imgContours1 = zeros((height, width, 3), uint8)
            contours2 = []; imgContours2 = zeros((height, width, 3), uint8)
            contours3 = []; imgContours3 = zeros((height, width, 3), uint8)
            contours4 = []; imgContours4 = zeros((height, width, 3), uint8)

            for possibleChar in listOfPossibleCharsInPlate:
                contours1.append(possibleChar.contour)
            drawContours(imgContours1, contours1, -1, Colors.white)

            if len(listOfListsOfMatchingCharsInPlate) > 0:

                for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                    intRandomBlue = randint(0, 255)
                    intRandomGreen = randint(0, 255)
                    intRandomRed = randint(0, 255)
                    for matchingChar in listOfMatchingChars:
                        contours2.append(matchingChar.contour)
                    drawContours(imgContours2, contours2, -1, (intRandomBlue, intRandomGreen, intRandomRed))

                for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                    intRandomBlue = randint(0, 255)
                    intRandomGreen = randint(0, 255)
                    intRandomRed = randint(0, 255)
                    for matchingChar in listOfMatchingChars:
                        contours3.append(matchingChar.contour)
                    drawContours(imgContours3, contours3, -1, (intRandomBlue, intRandomGreen, intRandomRed))

                for matchingChar in longestListOfMatchingCharsInPlate:
                    contours4.append(matchingChar.contour)
                drawContours(imgContours4, contours4, -1, Colors.white)

            imwrite("img_possible_plate_%d.jpg" % intPlateCounter, possiblePlate.imgPlate)
            imwrite("img_possible_plate_gray_%d.jpg" % intPlateCounter, possiblePlate.imgGrayscale)
            imwrite("img_possible_plate_threshold_scene_%d.jpg" % intPlateCounter, imgThreshScene)
            imwrite("img_possible_plate_threshold_%d.jpg" % intPlateCounter, possiblePlate.imgThresh)
            imwrite("img_possible_plate_contours1_%d.jpg" % intPlateCounter, imgContours1)
            if len(listOfListsOfMatchingCharsInPlate) > 0:
                imwrite("img_possible_plate_contours2_%d.jpg" % intPlateCounter, imgContours2)
                imwrite("img_possible_plate_contours3_%d.jpg" % intPlateCounter, imgContours3)
                imwrite("img_possible_plate_contours4_%d.jpg" % intPlateCounter, imgContours4)

            if len(listOfListsOfMatchingCharsInPlate) > 0:
                debug("Characters found in plate number #%d = %s" % (intPlateCounter, possiblePlate.strChars), True)
                intPlateCounter = intPlateCounter + 1
            else:
                debug("Characters found in plate number #%d = (none)" % intPlateCounter, True)
                intPlateCounter = intPlateCounter + 1

        # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- ..
        # If no groups of matching chars were found in the plate, continue for next plate candidate:
        if len(listOfListsOfMatchingCharsInPlate) == 0:

            possiblePlate.strChars = ""
            continue

    if DebugMode:
        debug("Characters detection complete", True)

    return listOfPossiblePlates
 def preprocess(self):
     """ Preprocessing phase to execute boundary finding, reordering and equation calculation sequentially.
 """
     self._preprocessRan = True
     self._node_reorder2, self._reorder_E, self._L_inv, self._U_inv, self._L_k_inv, self._U_k_inv, self._boundary_start_number, self._index_start, self._T1, self._T2 = preprocess(
         self._partition_list, self._nparts, self._n, self._E)