def main(args): gpu = args.gpu path_config = args.config mode = args.mode path_word2vec = args.word2vec curriculum = False if args.curriculum == 0 else True # Hyper parameters (const) MAX_EPOCH = 10000000000 MAX_PATIENCE = 20 EVAL = 10000 if curriculum: LENGTH_LIMITS = [10, 20, 30, 40, 50] # NOTE: experimental else: LENGTH_LIMITS = [50] config = utils.Config(path_config) # Preparaton path_corpus_train = config.getpath("prep_corpus") + ".train" path_corpus_val = config.getpath("prep_corpus") + ".val" basename = "won.%s.%s" % ( os.path.basename(path_corpus_train), os.path.splitext(os.path.basename(path_config))[0]) path_snapshot = os.path.join(config.getpath("snapshot"), basename + ".model") path_snapshot_vectors = os.path.join(config.getpath("snapshot"), basename + ".vectors.txt") if mode == "train": path_log = os.path.join(config.getpath("log"), basename + ".log") utils.set_logger(path_log) elif mode == "evaluation": path_evaluation = os.path.join(config.getpath("evaluation"), basename + ".txt") utils.set_logger(path_evaluation) elif mode == "analysis": path_analysis = os.path.join(config.getpath("analysis"), basename) utils.logger.debug("[info] TRAINING CORPUS: %s" % path_corpus_train) utils.logger.debug("[info] VALIDATION CORPUS: %s" % path_corpus_val) utils.logger.debug("[info] CONFIG: %s" % path_config) utils.logger.debug("[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec) utils.logger.debug("[info] SNAPSHOT (MODEL): %s " % path_snapshot) utils.logger.debug("[info] SNAPSHOT (WORD EMBEDDINGS): %s " % path_snapshot_vectors) if mode == "train": utils.logger.debug("[info] LOG: %s" % path_log) elif mode == "evaluation": utils.logger.debug("[info] EVALUATION: %s" % path_evaluation) elif mode == "analysis": utils.logger.debug("[info] ANALYSIS: %s" % path_analysis) # Hyper parameters word_dim = config.getint("word_dim") state_dim = config.getint("state_dim") aggregation = config.getstr("aggregation") attention = config.getstr("attention") retrofitting = config.getbool("retrofitting") alpha = config.getfloat("alpha") scale = config.getfloat("scale") identity_penalty = config.getbool("identity_penalty") lmd = config.getfloat("lambda") grad_clip = config.getfloat("grad_clip") weight_decay = config.getfloat("weight_decay") batch_size = config.getint("batch_size") utils.logger.debug("[info] WORD DIM: %d" % word_dim) utils.logger.debug("[info] STATE DIM: %d" % state_dim) utils.logger.debug("[info] AGGREGATION METHOD: %s" % aggregation) utils.logger.debug("[info] ATTENTION METHOD: %s" % attention) utils.logger.debug("[info] RETROFITTING: %s" % retrofitting) utils.logger.debug("[info] ALPHA = %f" % alpha) utils.logger.debug("[info] SCALE: %f" % scale) utils.logger.debug("[info] IDENTITY PENALTY: %s" % identity_penalty) utils.logger.debug("[info] LAMBDA: %f" % lmd) utils.logger.debug("[info] GRADIENT CLIPPING: %f" % grad_clip) utils.logger.debug("[info] WEIGHT DECAY: %f" % weight_decay) utils.logger.debug("[info] BATCH SIZE: %d" % batch_size) if retrofitting: assert path_word2vec is not None # Data preparation corpus_train_list = [ load_corpus( path_corpus_train, vocab=path_corpus_train + ".vocab", max_length=length_limit) for length_limit in LENGTH_LIMITS] corpus_val = load_corpus( path_corpus_val, vocab=corpus_train_list[0].vocab, max_length=LENGTH_LIMITS[-1]) # Model preparation if (mode == "train") and (path_word2vec is not None): initialW_data = utils.load_word2vec_weight_matrix( path_word2vec, word_dim, corpus_train_list[0].vocab, scale) else: initialW_data = None cuda.get_device(gpu).use() model = models.WON( vocab_size=len(corpus_train_list[0].vocab), word_dim=word_dim, state_dim=state_dim, aggregation=aggregation, attention=attention, initialW=initialW_data, EOS_ID=corpus_train_list[0].vocab["<EOS>"]) if mode != "train": serializers.load_npz(path_snapshot, model) model.to_gpu(gpu) # Training/Evaluation/Analysis if mode == "train": length_index = 0 utils.logger.debug("[info] Evaluating on the validation set ...") loss, acc = evaluate(model, corpus_val, lmd, identity_penalty) utils.logger.debug("[validation] iter=0, epoch=0, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \ (LENGTH_LIMITS[length_index], loss, acc*100)) for _ in np.random.randint(0, len(corpus_val), 10): s = corpus_val.random_sample() batch_sents = [s] batch_labels = make_labels(batch_sents) _, order_pred = model.forward(batch_sents, train=False) order_pred = [a[0] for a in order_pred] order_gold = batch_labels[0] s = [corpus_val.ivocab[w] for w in s] s_pred = utils.reorder(s, order_pred) s_gold = utils.reorder(s, order_gold) s_pred = " ".join(s_pred).encode("utf-8") s_gold = " ".join(s_gold).encode("utf-8") utils.logger.debug("[check] <Gold> %s" % s_gold) utils.logger.debug("[check] <Pred> %s" % s_pred) utils.logger.debug("[check] <Gold:order> %s" % order_gold) utils.logger.debug("[check] <Pred:order> %s" % order_pred) # training & validation opt = optimizers.SMORMS3() opt.setup(model) opt.add_hook(chainer.optimizer.GradientClipping(grad_clip)) opt.add_hook(chainer.optimizer.WeightDecay(weight_decay)) # best_acc = -1.0 best_acc = acc patience = 0 it = 0 n_train = len(corpus_train_list[0]) # TODO finish_training = False for epoch in xrange(1, MAX_EPOCH+1): if finish_training: break for data_i in xrange(0, n_train, batch_size): if data_i + batch_size > n_train: break # data preparation batch_sents = corpus_train_list[length_index].next_batch(size=batch_size) batch_labels = make_labels(batch_sents) # forward loss, acc = forward(model, batch_sents, batch_labels, lmd, identity_penalty, train=True) # TODO: BEGIN if retrofitting: part_indices_data = np.asarray(list( set([w for s_ in batch_sents for w in s_]) )) part_initialW_data = initialW_data[part_indices_data] part_indices = Variable(cuda.cupy.asarray(part_indices_data, dtype=np.int32), volatile=False) part_initialW = Variable(cuda.cupy.asarray(part_initialW_data, dtype=np.float32), volatile=False) loss_ret = frobenius_squared_error(model.embed(part_indices), part_initialW) else: loss_ret = 0.0 loss = loss + alpha * loss_ret # TODO: END # backward & update model.zerograds() loss.backward() loss.unchain_backward() opt.update() it += 1 # log loss = float(cuda.to_cpu(loss.data)) acc = float(cuda.to_cpu(acc.data)) utils.logger.debug("[training] iter=%d, epoch=%d (%d/%d=%.03f%%), max_length=%d, loss=%.03f, accuracy=%.2f%%" % \ (it, epoch, data_i+batch_size, n_train, float(data_i+batch_size)/n_train * 100, LENGTH_LIMITS[length_index], loss, acc*100)) if it % EVAL == 0: # validation utils.logger.debug("[info] Evaluating on the validation set ...") loss, acc = evaluate(model, corpus_val, lmd, identity_penalty) utils.logger.debug("[validation] iter=%d, epoch=%d, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \ (it, epoch, LENGTH_LIMITS[length_index], loss, acc*100)) for _ in np.random.randint(0, len(corpus_val), 10): s = corpus_val.random_sample() batch_sents = [s] batch_labels = make_labels(batch_sents) _, order_pred = model.forward(batch_sents, train=False) order_pred = [a[0] for a in order_pred] order_gold = batch_labels[0] s = [corpus_val.ivocab[w] for w in s] s_pred = utils.reorder(s, order_pred) s_gold = utils.reorder(s, order_gold) s_pred = " ".join(s_pred).encode("utf-8") s_gold = " ".join(s_gold).encode("utf-8") utils.logger.debug("[check] <Gold> %s" % s_gold) utils.logger.debug("[check] <Pred> %s" % s_pred) utils.logger.debug("[check] <Gold:order> %s" % order_gold) utils.logger.debug("[check] <Pred:order> %s" % order_pred) if best_acc < acc: # save utils.logger.debug("[info] Best accuracy is updated: %.2f%% => %.2f%%" % (best_acc*100.0, acc*100.0)) best_acc = acc patience = 0 serializers.save_npz(path_snapshot, model) serializers.save_npz(path_snapshot + ".opt", opt) save_word2vec(path_snapshot_vectors, extract_word2vec(model, corpus_train_list[length_index].vocab)) utils.logger.debug("[info] Saved.") else: patience += 1 utils.logger.debug("[info] Patience: %d (best accuracy: %.2f%%)" % (patience, best_acc*100.0)) if patience >= MAX_PATIENCE: if curriculum and (length_index != len(LENGTH_LIMITS)-1): length_index += 1 break else: utils.logger.debug("[info] Patience %d is over. Training finished." \ % patience) finish_training = True break elif mode == "evaluation": pass elif mode == "analysis": utils.mkdir(path_analysis) f = open(os.path.join(path_analysis, "dump.txt"), "w") data_i = 0 for s in pyprind.prog_bar(corpus_val): # NOTE: analysisの場合は, 文長を気にせずすべて解かせる batch_sents = [s] batch_labels = make_labels(batch_sents) _, order_pred = model.forward(batch_sents, train=False) order_pred = [a[0] for a in order_pred] order_gold = batch_labels[0] s = [corpus_val.ivocab[w] for w in s] s_pred = utils.reorder(s, order_pred) s_gold = utils.reorder(s, order_gold) s_pred = " ".join(s_pred).encode("utf-8") s_gold = " ".join(s_gold).encode("utf-8") f.write("[%d] <Gold> %s\n" % (data_i+1, s_gold)) f.write("[%d] <Pred> %s\n" % (data_i+1, s_pred)) f.write("[%d] <Gold:order> %s\n" % (data_i+1, order_gold)) f.write("[%d] <Pred:order> %s\n" % (data_i+1, order_pred)) data_i += 1 f.flush() f.close() utils.logger.debug("[info] Done.")
contours, hierarchy = cv2.findContours( imgErode, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # FINDING CONTOURS contours = sorted( contours, key=cv2.contourArea, reverse=True) # SORTING CONTOURS TO GET THE BIGGEST CONTOUR for c in contours: p = cv2.arcLength(c, True) # FINDING ARC LENGTH approx = cv2.approxPolyDP(c, 0.02 * p, True) if len(approx) == 4: # LENGTH IS 4 FOR RECTANGLE target = approx break approx = utils.reorder(target) # REORDERING THE POINTS #cv2.drawContours(img,target,-1,(0,255,0),20) # TO DISPLAY CONTOURS print(approx) # DISPLAYS THE POINTS pts1 = np.float32(approx) pts2 = np.float32([[0, 0], [widthImg, 0], [0, heightImg], [widthImg, heightImg]]) matrix = cv2.getPerspectiveTransform(pts1, pts2) imgWarpColored = cv2.warpPerspective( img, matrix, (widthImg, heightImg)) # WARPS THE IMAGE cv2.imshow('Original', img) cv2.imshow('Erode', imgErode) cv2.imshow('WarpCol', imgWarpColored) if cv2.waitKey(1) & 0xFF == ord('q'):
contours, hierarchy = cv2.findContours(imgCanny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) cv2.drawContours(imgContours,contours,-1,(255,0,255),7) # =================Find Rectangles=================== rectCont = utils.rectContour(contours) biggestContour = utils.getCornerPoints(rectCont[0]) # First Biggest area gradePoints = utils.getCornerPoints(rectCont[1]) # second biggest for grading if len(biggestContour)!=0 and len(gradePoints!=0): cv2.drawContours(imgBiggestContours,biggestContour,-1,(0,0,255),15) cv2.drawContours(imgBiggestContours,gradePoints,-1,(255,0,0),15) # Reorder points for our bird view biggestContour = utils.reorder(biggestContour) gradePoints = utils.reorder(gradePoints) # ====================== Bird View for Biggest Rectangle (OMR)============================ pt1 = np.float32(biggestContour) pt2 = np.float32([[0, 0],[widthImg, 0], [0, heightImg],[widthImg, heightImg]]) matrix = cv2.getPerspectiveTransform(pt1,pt2) imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg)) # ============= Bird View for Second Biggest Rectangle (Grading) ============================ ptsG1 = np.float32(gradePoints) ptsG2 = np.float32([[0, 0], [325, 0], [0, 150], [325, 150]]) matrixG = cv2.getPerspectiveTransform(ptsG1, ptsG2) imgGradeDisplay = cv2.warpPerspective(img, matrixG, (325, 150)) #cv2.imshow("grade",imgGradeDisplay)
else: img = cv2.imread(path) imgContours, conts = utils.getContours(img, minArea=50000, filter=4) if len(conts) != 0: biggest = conts[0][2] #print(biggest) imgWarp = utils.warpImg(img, biggest, wP, hP) imgContours2, conts2 = utils.getContours(imgWarp, minArea=2000, filter=4, cThr=[50, 50], draw=False) if len(conts) != 0: for obj in conts2: cv2.polylines(imgContours2, [obj[2]], True, (0, 255, 0), 2) nPoints = utils.reorder(obj[2]) nW = round((utils.findDis(nPoints[0][0] // scale, nPoints[1][0] // scale) / 10), 1) nH = round((utils.findDis(nPoints[0][0] // scale, nPoints[2][0] // scale) / 10), 1) cv2.arrowedLine(imgContours2, (nPoints[0][0][0], nPoints[0][0][1]), (nPoints[1][0][0], nPoints[1][0][1]), (255, 0, 255), 3, 8, 0, 0.05) cv2.arrowedLine(imgContours2, (nPoints[0][0][0], nPoints[0][0][1]), (nPoints[2][0][0], nPoints[2][0][1]), (255, 0, 255), 3, 8, 0, 0.05) x, y, w, h = obj[3] cv2.putText(imgContours2, '{}cm'.format(nW), (x + 30, y - 10), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (255, 0, 255),
def test_reorder(): seq = ['b', 'c', 'a'] assert utils.reorder(seq, [2, 0, 1]) == ['a', 'b', 'c']
t1, t2 = utils.get_val() # print(t1,t2) #Load Image img = cv2.imread(path) # print(img.shape) #Perform edge detection,dilation and erosion pp_img, orig_copy = utils.preprocess(img, t1, t2) imgContours = orig_copy.copy() #Copying image for display purposes #Finding contours from the binary image contours = utils.get_contours(pp_img) biggest_contours = utils.getBigCntr(contours) cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 2) reorderd_points = utils.reorder(biggest_contours) # print("reordered points : ",len(reorderd_points),type(reorderd_points)) #Apply perspective transform on the resized image final = utils.get_perspective(reorderd_points, orig_copy) #display the images cv2.imshow("Image", orig_copy) cv2.imshow("Preprocessed", pp_img) cv2.imshow("contours", imgContours) cv2.imshow("Final", final) if cv2.waitKey(1) == 27: cv2.imwrite("output/resized.jpeg", orig_copy) cv2.imwrite("output/preprocessed.jpeg", pp_img) cv2.imwrite("output/contours.jpeg", imgContours)
imgThreshold = cv2.Canny(imgBlur, thres[0], thres[1]) # APPLY CANNY BLUR kernel = np.ones((5, 5)) imgDial = cv2.dilate(imgThreshold, kernel, iterations=2) # APPLY DILATION imgThreshold = cv2.erode(imgDial, kernel, iterations=1) # APPLY EROSION ## FIND ALL COUNTOURS imgContours = img.copy() # COPY IMAGE FOR DISPLAY PURPOSES imgBigContour = img.copy() # COPY IMAGE FOR DISPLAY PURPOSES contours, hierarchy = cv2.findContours(imgThreshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # FIND ALL CONTOURS cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 10) # DRAW ALL DETECTED CONTOURS # FIND THE BIGGEST COUNTOUR biggest, maxArea = utils.biggestContour(contours) # FIND THE BIGGEST CONTOUR if biggest.size != 0: biggest = utils.reorder(biggest) cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20) # DRAW THE BIGGEST CONTOUR imgBigContour = utils.drawRectangle(imgBigContour, biggest, 2) pts1 = np.float32(biggest) # PREPARE POINTS FOR WARP pts2 = np.float32([[0, 0], [widthImg, 0], [0, heightImg], [widthImg, heightImg]]) # PREPARE POINTS FOR WARP matrix = cv2.getPerspectiveTransform(pts1, pts2) imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg)) # REMOVE 20 PIXELS FORM EACH SIDE imgWarpColored = imgWarpColored[20:imgWarpColored.shape[0] - 20, 20:imgWarpColored.shape[1] - 20] imgWarpColored = cv2.resize(imgWarpColored, (widthImg, heightImg)) # APPLY ADAPTIVE THRESHOLD imgWarpGray = cv2.cvtColor(imgWarpColored, cv2.COLOR_BGR2GRAY) imgAdaptiveThre = cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2) imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre)
def update_graph(self, element_list, rank): """Add ranking information to the graph""" ordered_elements = utils.reorder(element_list, rank) for i in list(range(len(ordered_elements)))[:-1]: self.add_edge(superior_element=ordered_elements[i], inferior_element=ordered_elements[i + 1])
def ordering(self): '''Change the order of all children in a parent's collection.''' program_id = self.get_argument('programId', '') survey_id = self.get_argument('surveyId', '') parent_id = self.get_argument('parentId', '') root = self.get_argument('root', None) #if parent_id and root is None: if parent_id is None and root is None: raise errors.ModelError( "Parent ID required, or specify 'root=' for root nodes") if root is not None and parent_id: raise errors.ModelError( "Can't specify both 'root=' and parent ID") if not survey_id: raise errors.ModelError( "Survey ID is required for operating on root nodes") son = json_decode(self.request.body) with model.session_scope() as session: user_session = self.get_user_session(session) act = Activities(session) if parent_id: parent = ( session.query(model.QuestionNode) .get((parent_id, program_id))) if not parent: raise errors.MissingDocError( "Parent question node does not exist") survey = parent.survey if survey_id and survey_id != str(survey.id): raise errors.MissingDocError( "Parent does not belong to that survey") log.debug("Reordering children of: %s", parent) reorder(parent.children, son) act.record(user_session.user, parent, ['reorder_children']) act.ensure_subscription( user_session.user, parent, parent.program, self.reason) elif root is not None: survey = ( session.query(model.Survey) .get((survey_id, program_id))) if not survey: raise errors.MissingDocError("No such survey") log.debug("Reordering children of: %s", survey) reorder(survey.qnodes, son) act.record( user_session.user, survey, ['reorder_children']) act.ensure_subscription( user_session.user, survey, survey.program, self.reason) else: raise errors.ModelError( "Survey or parent ID required") policy = user_session.policy.derive({ 'program': survey.program, 'survey': survey, 'surveygroups': survey.program.surveygroups, }) policy.verify('surveygroup_interact') policy.verify('qnode_edit') self.query()
rna_id_temp = subset_sample_ids_rna.tolist() meth_id_temp = subset_sample_ids_meth.tolist() # I think this explanation is wrong lol, code works beautifully # after for-loop, ordered_index will contain the indexes that methylation should change to in order to match rna # ex. ordered_index = [5,2,6,1,3,4], then in order for methylation to be in the same order as rna, we need to create # a new list that looks like: [meth[5],meth[2],meth[6],meth[1],meth[3],meth[4]] for rna_id in rna_id_temp: idx = meth_id_temp.index(rna_id) ordered_index.append(idx) reorder(subset_sample_ids_rna, ordered_index) reorder(data_rna, ordered_index) reorder(T_rna, ordered_index) reorder(subset_tissue_rna, ordered_index) # create joint data data = np.concatenate((data_rna, data_meth), 1) T = T_rna model = DPGMM(data=data, tissue_assignments=T,
def doFitsClassic(ws, mhypVar, recoMassVar, cat, proc, allMasses, massScaleNuisance, resolutionNuisance): # classic fitting of signal MC # fitted values for this category and signal process # first index is the Gaussian component number # second index is the mass point index sigmaValues = [] dmuValues = [] fracValues = [] normValues = [] for mass in allMasses: # get the signal MC dataset # e.g. sig_Hem_unbinned_ggh_115_cat7 dataset = utils.getObj(ws, "sig_Hem_unbinned_%s_%d_%s" % (proc, mass, cat)) # get the signal pdf # e.g. sigpdf_vbf_115_cat8 pdf = utils.getObj(ws, "sigpdf_%s_%d_%s" % (proc, mass, cat)) #---------- # adjust fit parameters if specified #---------- sigmaVars = getGaussianVars(ws, "sigma", proc, mass, cat) dmuVars = getGaussianVars(ws, "dmu", proc, mass, cat) fracVars = getGaussianVars(ws, "frac", proc, mass, cat) numGaussians = len(sigmaVars) assert numGaussians == len(dmuVars) assert numGaussians == len(fracVars) + 1 for varname, vars in (("sigma", sigmaVars), ("dmu", dmuVars), ): for gaussianIndex in range(len(vars)): # set the variable range and initial value of this variable setVariableRange(fitparams, varname + "%d" % gaussianIndex, vars[gaussianIndex], proc, mass, cat) # end of loop over Gaussian components # end of loop over variables #---------- # perform the fit #---------- pdf.fitTo(dataset, ROOT.RooFit.Minimizer("Minuit2"), ROOT.RooFit.Range(mass + getFitParam(fitparams, "fitRangeLeft", proc, mass, cat, - 5), mass + getFitParam(fitparams, "fitRangeRight", proc, mass, cat, +5)), # take MC statistics error, not error on number of events... ROOT.RooFit.SumW2Error(False), ) #---------- # normalization object #---------- sumWeights = dataset.sumEntries() normVar = ROOT.RooRealVar(pdf.GetName() + "_norm", pdf.GetName() + "_norm", sumWeights, 0, sumWeights); gcs.append(normVar) normVar.setConstant(True) getattr(ws, 'import')(normVar) normValues.append(sumWeights) #---------- # sort the Gaussian components according to the width #---------- indices = sorted(range(numGaussians), key = lambda index: sigmaVars[index].getVal() ) # instead of reordering the objects, we re-assign the values utils.reassignValues(indices, sigmaVars) utils.reassignValues(indices, dmuVars) # note that for the fractions (which are continued fractions), # we must expand them, sort and then collapse again # (the values will be different !) expandedFracValues = utils.expandContinuedFraction([ x.getVal() for x in fracVars]) expandedFracValues = utils.reorder(indices, expandedFracValues) unexpandedFracValues = utils.collapseContinuedFraction(expandedFracValues) for value, var in zip(unexpandedFracValues, fracVars): var.setVal(value) #---------- # fix the fitted parameters and read the fitted values #---------- for vars, values in ((sigmaVars, sigmaValues), (dmuVars, dmuValues), (fracVars, fracValues), ): if len(values) == 0: values.extend([[ ] for i in range(len(vars)) ] ) # freeze the fitted variables at the fit final values # and add the values to a list for interpolation for gaussIndex, var in enumerate(vars): var.setConstant(True) values[gaussIndex].append(var.getVal()) #---------- # end of loop over masses #---------- # produce the interpolating objects #---------- interpDmuFuncs = [] interpSigmaFuncs = [] interpFracFuncs = [] for varname, values, interpFuncs in (("sigma", sigmaValues, interpSigmaFuncs), ("dmu", dmuValues, interpDmuFuncs), ("frac", fracValues, interpFracFuncs)): for gaussIndex in range(len(values)): funcname = utils.makeGaussianVarname(varname + "func", proc, None, # mhyp cat, gaussIndex ) func = utils.makePiecewiseLinearFunction(funcname, mhypVar, allMasses, values[gaussIndex]) # import this function into the workspace getattr(ws, 'import')(func, ROOT.RooFit.RecycleConflictNodes()) interpFuncs.append(func) # end of loop over Gaussian components # end of loop over variables #---------- # build the interpolated signal PDF #---------- # example name: sigpdf_vbf_cat6 suffix = "_".join([ proc, # str(mhyp), # not used here cat, ]) pdfname = "sigpdf_" + suffix pdf = utils.makeSumOfGaussians(pdfname, recoMassVar, # reconstructed mass mhypVar, # Higgs mass hypothesis interpDmuFuncs, interpSigmaFuncs, interpFracFuncs, massScaleNuisance = massScaleNuisance, resolutionNuisance = resolutionNuisance, ); gcs.append(pdf) # import this function into the workspace getattr(ws, 'import')(pdf, ROOT.RooFit.RecycleConflictNodes()) #---------- # build the interpolated normalization function #---------- normfunc = utils.makePiecewiseLinearFunction(pdfname + "_norm", mhypVar, allMasses, normValues); gcs.append(normfunc) # import this function into the workspace getattr(ws, 'import')(normfunc, ROOT.RooFit.RecycleConflictNodes())
descs[i] = nx.descendants(H, ni) for j in range(num_outputs): nj = nodes[outputs][j] ancs[j] = nx.ancestors(H, nj) input_similarity = np.zeros((num_inputs, num_inputs)) for i in range(num_inputs): for j in range(num_inputs): seti = descs[i] setj = descs[j] input_similarity[i, j] = 2*len(seti & setj)/(len(seti) + len(setj)) indinp = reorder(input_similarity) input_similarity = input_similarity[indinp, :][:, indinp] output_similarity = np.zeros((num_outputs, num_outputs)) for i in range(num_outputs): for j in range(num_outputs): seti = ancs[i] setj = ancs[j] output_similarity[i, j] = 2*len(seti & setj)/(len(seti) + len(setj)) indout = reorder(output_similarity) output_similarity = output_similarity[indout, :][:, indout]
def doc_scan_pipeline(input=PATH, output="./img/scanned_doc.jpg"): img = cv2.imread(input) # 0. Convert given image from BGR to RGB format img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) h, w, _ = img.shape img = cv2.resize(img, (width, height)) # 1. Convert to grayscale img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 2. Add Gaussian blur img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1) # 3. Add Canny edge detection img_threshold = cv2.Canny(img_blur, 100, 200, L2gradient=True) # 3.1 Apply dilation kernel = np.ones((3, 3)) img_threshold = cv2.dilate(img_threshold, kernel, iterations=2) # 4. Find all the contours img_contours = img.copy() img_big_contour = img.copy() contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(image=img_contours, contours=contours, contourIdx=-1, color=(0, 255, 0), thickness=5) # 5. Find the biggest contour biggest, maxArea = biggest_contour(contours) biggest = reorder(biggest) cv2.drawContours(image=img_big_contour, contours=biggest, contourIdx=-1, color=(0, 255, 0), thickness=10) # 5.1 Draw a rectangle, i.e., 4 lines connecting the 4 dots corresponding to the largest contour img_big_contour = draw_rectangle(img_big_contour, biggest, thickness=2) pts1 = np.float32(biggest) pts2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) # 6. Image Warp # 6.1 Calculate a 3x3 perspective transform matrix matrix = cv2.getPerspectiveTransform(pts1, pts2) # 6.2 Apply the perspective matrix to the image img_warp_coloured = cv2.warpPerspective(img, matrix, (width, height)) # 7. Adaptive thresholding img_warp_gray = cv2.cvtColor(img_warp_coloured, cv2.COLOR_BGR2GRAY) img_adaptive_th = cv2.adaptiveThreshold(img_warp_gray, 255, 1, cv2.THRESH_BINARY, 5, 2) # 7.1 Apply median blurring to remove tiny speckles of noise img_adaptive_th = cv2.medianBlur(img_adaptive_th, 3) # Save the document to disk cv2.imwrite(output, img_adaptive_th) # Add labels to each image img = draw_text(img, "Original") img_gray = draw_text(img_gray, "Grayscale") img_blur = draw_text(img_blur, "Gaussian Blur", pos=(int(width / 4), 50)) img_threshold = draw_text(img_threshold, "Canny Edge", pos=(int(width / 4), 50)) img_contours = draw_text(img_contours, "Contours") img_big_contour = draw_text(img_big_contour, "Largest Contour", pos=(int(width / 7), 50)) img_warp_coloured = draw_text(img_warp_coloured, "Warp", pos=(int(width / 3), 50)) img_adaptive_th = draw_text(img_adaptive_th, "Adaptive Thresholding", pos=(int(width / 7), 50), font_scale=2, font_thickness=6) blank_img = np.zeros((height, width, 3), dtype=np.uint8) image_list = [ img, img_gray, img_blur, img_threshold, img_contours, img_big_contour, img_warp_coloured, img_adaptive_th ] # Combine the images into a grid # image_grid returns PIL image, np.asarray() can be used to convert it back to cv2 compatible format grid = np.asarray(image_grid(image_list, width, height))
def main(): if request.method == "POST": if request.files: binaryImage = request.files["image"].read() pil_img = readimage(binaryImage) img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) SCALE_PERCENT = 40 width = int(img.shape[1] * SCALE_PERCENT / 100) height = int(img.shape[0] * SCALE_PERCENT / 100) img = cv2.resize(img, (width, height)) imgContours = img.copy() imgWithContourPoints = img.copy() imgGray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) imgCanny = cv2.Canny(imgBlur, 10, 50) contours, hierarchy = cv2.findContours(imgCanny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 1) rects = rectContour(contours) firstChoiceAreaContour = getCornerPoints(rects[1]) secondChoiceAreaContour = getCornerPoints(rects[0]) studentIdContour = getCornerPoints(rects[4]) examIdContour = getCornerPoints(rects[5]) firstChoiceAreaContour = reorder(firstChoiceAreaContour) secondChoiceAreaContour = reorder(secondChoiceAreaContour) studentIdContour = reorder(studentIdContour) examIdContour = reorder(examIdContour) firstChoiceAreaImage = separateImageArea(img, firstChoiceAreaContour, 175 * 3, 455 * 3) secondChoiceAreaImage = separateImageArea(img, secondChoiceAreaContour, 175 * 3, 455 * 3) studentIdAreaImage = separateImageArea(img, studentIdContour, 80 * 2, 190 * 2) examIdAreaImage = separateImageArea(img, examIdContour, 40 * 5, 190 * 5) examIdGray = cv2.cvtColor(examIdAreaImage, cv2.COLOR_RGB2GRAY) examIdThresh = cv2.threshold( examIdGray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] studentIdGray = cv2.cvtColor(studentIdAreaImage, cv2.COLOR_RGB2GRAY) studentIdThresh = cv2.threshold( studentIdGray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] firstChoiceAreaGray = cv2.cvtColor(firstChoiceAreaImage, cv2.COLOR_RGB2GRAY) firstChoiceAreaThresh = cv2.threshold( firstChoiceAreaGray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] secondChoiceAreaGray = cv2.cvtColor(secondChoiceAreaImage, cv2.COLOR_RGB2GRAY) secondChoiceAreaThresh = cv2.threshold( secondChoiceAreaGray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] # result examId = getExamIdValue(examIdThresh) studentId = getStudentIdValue(studentIdThresh) studentChoices = getChoiceValue( firstChoiceAreaThresh) + getChoiceValue(secondChoiceAreaThresh) return { "examId": examId, "studentId": studentId, "values": studentChoices }
imgCanny = cv2.Canny(imgBlur, 10, 50) countours, hierarchy = cv2.findContours(imgCanny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) cv2.drawContours(imgContours, countours,-1,(0,255,0),5) rectCon = utils.rectContour(countours) biggestContour = utils.getCornerPoints(rectCon[0]) #print(biggestContour.shape) if biggestContour.size != 0: cv2.drawContours(imgBiggestContours, biggestContour, -1, (0,255,0), 10) biggestContour = utils.reorder(biggestContour) pt1 = np.float32(biggestContour) pt2 = np.float32([[0,0],[width,0],[0,height],[width,height]]) matrix = cv2.getPerspectiveTransform(pt1,pt2) imgWarpColored = cv2.warpPerspective(img,matrix,(width,height)) imgWarpGray = cv2.cvtColor(imgWarpColored,cv2.COLOR_BGR2GRAY) imgThresh = cv2.threshold(imgWarpGray, 170, 255,cv2.THRESH_BINARY_INV )[1] #print(imgThresh) utils.splitBoxes(imgThresh)