def main(): global nDocuments, docIdList clusterSets = [8, 16, 32, 64] clusterTiming = [] clusterEntropy = [] # preprocess init p.init() loadClassMasterMap() nDocuments = len(p.chiFeatureVectors) # 15000 documents took 742 s to run , i.e 12 mins approx # nDocuments = 10000 docIdList = random.sample(p.chiFeatureVectors.keys(), nDocuments) for n in clusterSets: p.nClusters = n start = time.time() startKMeans() end = time.time() print "\nK-Means Clustering for ", n, " clusters took ", end - start, " time" entropy = displayClusterCounts() clusterTiming.append(end-start) clusterEntropy.append(entropy) print n, end-start, entropy print '-------------------------------------------------------------------------------------\n' plotGraphs(clusterSets, clusterTiming, clusterEntropy)
def main(): global isVisited, isClustered, neighbourCount, docList, sampledDocId p.init(False) docList = p.chiFeatureList sampledDocId = random.sample(list(xrange(21000)), 20000) for docId in sampledDocId: sampledDocList.append(docList[docId]) # knnHistogram() # drawHistogram() isVisited = [0] * len(sampledDocList) isClustered = [0] * len(sampledDocList) neighbourCount = [0] * len(sampledDocList) print(len(sampledDocList)) start = time.time() findCluster(8) # fileObj = open("cluster_result.txt", "w") # fileObj.write(str(isClustered)) # fileObj.close() evaluate() end = time.time() print "Running time", end - start
def main(): global nDocuments, docIdList clusterSets = [8, 16, 32, 64] clusterTiming = [] clusterEntropy = [] # preprocess init p.init() loadClassMasterMap() nDocuments = len(p.chiFeatureVectors) # 15000 documents took 742 s to run , i.e 12 mins approx # nDocuments = 10000 docIdList = random.sample(p.chiFeatureVectors.keys(), nDocuments) for n in clusterSets: p.nClusters = n start = time.time() startKMeans() end = time.time() print "\nK-Means Clustering for ", n, " clusters took ", end - start, " time" entropy = displayClusterCounts() clusterTiming.append(end - start) clusterEntropy.append(entropy) print n, end - start, entropy print '-------------------------------------------------------------------------------------\n' plotGraphs(clusterSets, clusterTiming, clusterEntropy)
def main(argv): file_path = argv[1] num_questions = argv[2] extract_sentences = preprocess.init(file_path) questions = tag.main(extract_sentences, num_questions)
def main(argv): file_path = argv[1] num_questions = argv[2] extract_sentences = preprocess.init(file_path) questions = ask_sys.init(extract_sentences) for question in questions: print(question)
def main(csv, feature): """ Draw a bar chart that ranks the importance of feature combinations in order. Parameters ---------- csv: `str` Path to csv file that contains the features. feature: `str` Name of the feature(s) for the random forest classifier. Returns ------- """ # The order in this list should be the same as columns in features.csv # column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", # "m4_1", "m4_2", "m4_3", "m4_4", "m4_5", "m4_6"] # features: "sepal_length", "sepal_width", "petal_length", "petal_width" column_names = ["NetworkType", "SubType"] column_names += list(feature) isSubType = True csv_file = csv # at_least is used for filtering out classes whose instance is below this threshold. at_least = 0 X, Y, sub_to_main_type, feature_order = init(csv_file, column_names, isSubType, at_least) # the number of iteration for multi-class classification N = 1000 # Valid methods are: "RandomOver", "RandomUnder", "SMOTE" and "None" sampling_method = "None" print("sampling_method: %s" % sampling_method) print("Number of instances: %d" % len(Y)) Matrix, NetworkTypeLabels, sum_accuracy, list_important_features = \ sum_confusion_matrix(X, Y, sub_to_main_type, feature_order, isSubType, sampling_method, N) average_matrix = np.asarray( list(map(lambda row: list(map(lambda e: e / N, row)), Matrix))) print("average accuracy: %f" % (float(sum_accuracy) / float(N))) plot_feature_importance(list_important_features, feature_order)
def main(): column_names = [ "NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", "m4_1", "m4_2", "m4_3", "m4_4", "m4_5", "m4_6" ] isSubType = True at_least = 1 X, Y, sub_to_main_type, feature_order = init("features.csv", column_names, isSubType, at_least) N = 100 c1_name = "ER Network" c2_name = "PeerToPeer" # c2 to c1 c1_X, c1_Y, c2_X, c2_Y = separator(X, Y, c1_name, c2_name) plot(c1_X, c2_X, feature_order, c1_name, c2_name)
def main(): column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", "m4_1", "m4_2", "m4_3", "m4_4", "m4_5", "m4_6"] isSubType = True at_least = 1 X, Y, sub_to_main_type, feature_order = init("features.csv", column_names, isSubType, at_least) N = 100 # synthesized to real real_X, real_Y, synthesized_X, synthesized_Y = separator(X, Y) bp_tuple_L = base_to_predict(synthesized_X, synthesized_Y, real_X, real_Y) Ys, accum_dic = make_layers(bp_tuple_L, Synthesized) plot_accumulation(Ys, accum_dic) # real to synthesized bp_tuple_L = base_to_predict(*separator(X, Y)) Ys, accum_dic = make_layers(bp_tuple_L, Y) plot_accumulation(Ys, accum_dic)
def main(csv, features, iter): # -f degree -f betweenness -f closeness -f eigencentrality -f coreness -f layerness -f pagerank -f sum_friends_friends -f transitivity column_names = ["NetworkType", "SubType"] + list(features) isSubType = True # use SubType as the labels for classification at_least = 0 X, Y, sub_to_main_type, feature_order = init(csv, column_names, isSubType, at_least) N = iter # network subtype one is interested in one = "seed" X_converted, Y_converted = convert_one_to_many(X, Y, one) list_accuracies, list_important_features, list_auc = many_classifications( X_converted, Y_converted, feature_order, N ) print("average accuracy: %f" % (float(sum(list_accuracies)) / float(N))) print("average AUC: %f" % (float(sum(list_auc)) / float(N))) dominant_features = plot_feature_importance(list_important_features, feature_order) first = dominant_features[0][0][0] second = dominant_features[1][0][0] if first == second: second = dominant_features[1][1][0] Y_converted_string_labels = [one if y == 1 else "non-seed" for y in Y_converted] x_label = first y_label = second x_index = feature_order.index(x_label) y_index = feature_order.index(y_label) plot_2d(np.array(X_converted), np.array(Y_converted_string_labels), x_index, y_index, x_label, y_label)
opt['pretrained_words'] = True opt['vocab_size'] = embedding.size(0) opt['embedding_dim'] = embedding.size(1) opt['pos_size'] = len(meta['vocab_tag']) opt['ner_size'] = len(meta['vocab_ent']) opt['cuda'] = args.cuda opt['classes'] = {'Y': 0, 'N': 1} opt['id_classes'] = {0: 'Yes', 1: 'No'} opt['interact'] = True BatchGen.pos_size = opt['pos_size'] BatchGen.ner_size = opt['ner_size'] model = LegalQAClassifier(opt, embedding, state_dict) w2id = {w: i for i, w in enumerate(meta['vocab'])} tag2id = {w: i for i, w in enumerate(meta['vocab_tag'])} ent2id = {w: i for i, w in enumerate(meta['vocab_ent'])} init() def interact_entailment(article, query): annotated = annotate(('interact-entailment', article, query, 'Y')) model_in = to_id(annotated, w2id, tag2id, ent2id) model_in = next( iter( BatchGen([model_in], opt, batch_size=1, gpu=args.cuda, evaluation=True))) prediction, values = model.interact_(model_in) max_probs = torch.max(prediction, 1)[1].item()
from Routenetwork import Routenetwork from preprocess import init, distanceList from Astar import Astar from Latlong import ShowNodesOnMap data = init() nodes = data[0] edges = data[1] nodedict = data[2] g = Routenetwork() for item in edges: g.addEdge(item) g_edges = g.getEdges() distances = distanceList(nodes, nodedict, g_edges) for items in nodes: distances[(items, items)] = 0 start = 'Birla Institute of Technology Hyderabad' end = 'RGIA' path = Astar(g, nodedict, distances, start, end) print('Final Route :') print('****Start****') for item in path: print(item) print('****End****') ShowNodesOnMap(path, nodedict)
import mapreads import validate import multialign import findorfs import findreps import abundance import annotate import fannotate import scaffold import findscforfs import propagate import classify import postprocess # initialize submodules preprocess.init(readlibs, asmcontigs, skipsteps, selected_programs["assemble"], run_fastqc,selected_programs["preprocess"]) assemble.init(readlibs, skipsteps, selected_programs["assemble"], asmcontigs, userKmerSupplied == False) mapreads.init(readlibs, skipsteps, selected_programs["mapreads"], savebtidx,ctgbpcov,lowmem) validate.init(readlibs, skipsteps, selected_programs["validate"], asmScores) findorfs.init(readlibs, skipsteps, selected_programs["findorfs"], min_ctg_len, min_ctg_cvg,read_orfs) findreps.init(readlibs, skipsteps) multialign.init(readlibs, skipsteps, forcesteps, selected_programs["multialign"],refgenomes) annotate.init(readlibs, skipsteps, selected_programs["annotate"], nofcpblast) fannotate.init(skipsteps) abundance.init(readlibs, skipsteps, forcesteps, selected_programs["annotate"]) scaffold.init(readlibs, skipsteps, retainBank) findscforfs.init(readlibs, skipsteps, selected_programs["findorfs"]) propagate.init(readlibs, skipsteps, selected_programs["annotate"]) classify.init(readlibs, skipsteps, selected_programs["annotate"], lowmem, 0 if not isolate_genome else 100) postprocess.init(readlibs, skipsteps, selected_programs["annotate"]) generic.init(skipsteps, readlibs)
import preprocess import assemble import mapreads import findorfs import findreps import abundance import annotate import fannotate import scaffold import findscforfs import propagate import classify import postprocess # initialize submodules preprocess.init(readlibs, skipsteps, selected_programs["assemble"], run_fastqc,filter) assemble.init(readlibs, skipsteps, selected_programs["assemble"], usecontigs) mapreads.init(readlibs, skipsteps, selected_programs["assemble"], selected_programs["mapreads"], savebtidx,ctgbpcov,lowmem) findorfs.init(readlibs, skipsteps, selected_programs["assemble"], selected_programs["findorfs"], min_ctg_len, min_ctg_cvg) findreps.init(readlibs, skipsteps) annotate.init(readlibs, skipsteps, selected_programs["classify"], nofcpblast) fannotate.init(skipsteps) abundance.init(readlibs, skipsteps, forcesteps, selected_programs["classify"]) scaffold.init(readlibs, skipsteps, retainBank, selected_programs["assemble"]) findscforfs.init(readlibs, skipsteps, selected_programs["findorfs"]) propagate.init(readlibs, skipsteps, selected_programs["classify"]) classify.init(readlibs, skipsteps, selected_programs["classify"]) postprocess.init(readlibs, skipsteps, selected_programs["classify"]) try: dlist = []
import multialign import findorfs import findreps import abundance import classify import classifyreads import fannotate import scaffold import findscforfs import propagate import bin import postprocess # initialize submodules preprocess.init(readlibs, asmcontigs, skipsteps, selected_programs["assemble"], run_fastqc, selected_programs["preprocess"]) assemble.init(readlibs, skipsteps, selected_programs["assemble"], asmcontigs, (userKmerSupplied == False and isolate_genome)) mapreads.init(readlibs, skipsteps, selected_programs["mapreads"], savebtidx, ctgbpcov, lowmem) benchmark.init(readlibs, skipsteps, availableRulers_dict["classifyreads"]) validate.init(readlibs, skipsteps, selected_programs["validate"], asmScores) findorfs.init(readlibs, skipsteps, selected_programs["findorfs"], min_ctg_len, min_ctg_cvg, read_orfs) findreps.init(readlibs, skipsteps) multialign.init(readlibs, skipsteps, forcesteps, selected_programs["multialign"], refgenomes) classify.init(readlibs, skipsteps, selected_programs["classify"], nofcpblast)
_, frame = cap.read() # frame copy for initial processes initial_frame = frame final_frame = frame # Initially, assume that the grid has not been preprocessed gridProcessed = False # Preprocessing starts here ---------------------------------------------------------------------- # finding initial corner points ------------------------------------------------------------- # calling the init function to give the dilated frame initial_dil = pre.init(frame) # we get the corners of the sudoku grid. # This is the first run, hence there is no angle correction initial_corner = pre.contourEndPoints(initial_dil) # proceed only if there are contours if initial_corner != 0: initial_corner = np.float32(initial_corner) # perspective transformation initial_pers_trans = cv2.getPerspectiveTransform(initial_corner, puzzle_corner) initial_puzzle_frame = cv2.warpPerspective(initial_frame, initial_pers_trans, (477,477)) final_puzzle_frame = initial_puzzle_frame # inital value