def InitResource(version): global database, resource, socket, listflie, tfidfmodel, tfidfdict, table_state_strategy if version is 'v1': listfile = 'cnn_qa_human_response_name.list' if version is 'v2': listfile = 'cnn_qa_human_response_name_high_app.list' if version is 'v2.5': listfile = 'cnn_qa_human_response_name_high_app.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') if version is 'v3': listfile = 'cnn_hr_v1_v2.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') if version is 'v4': listfile = 'cnn_hr_v1_v2_v4.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') datalist = [line.strip() for line in open(listfile)] database = Loader.LoadDataPair(datalist) resource = Loader.LoadLanguageResource() global TemplateLib, TopicLib, TreeState, Template, model TemplateLib = Loader.LoadTemplate(template_list) TopicLib = Loader.LoadTopic(topicfile) TreeState, Template = Control.Init() model = models.Doc2Vec.load('/tmp/word2vec_50') if wizard is 2: context = zmq.Context() socket = context.socket(zmq.REQ) socket.connect("tcp://localhost:5555") with open('table_state_strategy.pkl') as f: table_state_strategy = pickle.load(f)
def b_to_z(setting, n_components): logging.info("B -> Z") dictionary = loader.load_dictionary(setting.DICTIONARY_PATH) dict_vecto_tfidf = loader.load_dict_vecto_tfidf( setting.DICT_VECTO_TFIDF_PATH) # sparse_matrix = cp.reduce_dimention_pca(dict_vecto_tfidf.values(), len(dictionary), n_components=500, batch_size=20000) # b6 : map id product with nonnegative integer list_id_product = dict_vecto_tfidf.keys() dict_map_id = map_id_product(list_id_product) # b7 : reduce large matrix # sparse_matrix = cp.reduce_dimention(dict_vecto_tfidf.values(), len(dictionary), n_components=500,batch_size=20000) sparse_matrix = cp.reduce_dimension_svd(dict_vecto_tfidf.values(), len(dictionary), n_components=n_components) shape = sparse_matrix.shape logging.info("shape : " + str(shape)) dense_matrix = list(sparse_matrix) # b8 : build tree tree = cp.make_tree(setting, dict_id=dict_map_id.keys(), dict_vecto=dense_matrix, dimension=shape[1], amount_tree=10) # b9 : search nns in tree dict_result = {} for i in range(1000): list_nns = tree.get_nns_by_item(i, 11) dict_result[i] = list_nns dict_product = loader.load_dict_product(setting.DICT_PRODUCT_PATH) loader.save_result(setting.DICT_RESULT_PATH + "_" + str(n_components), dict_product, dict_map_id, dict_result)
def apply_thresholding_algorithm(image, method: int = 1, plot: bool = False): name = 'Not Set' thresholded = None if method == 1: name = 'Triangle' thresholded = _thresh(image, filters.threshold_triangle) elif method == 2: name = 'Mean' thresholded = _thresh(image, filters.threshold_mean) elif method == 3: name = 'Otsu' thresholded = _thresh(image, filters.threshold_otsu) elif method == 4: name = 'Yen' thresholded = _thresh(image, filters.threshold_yen) elif method == 5: name = 'Minimum' thresholded = _thresh(image, filters.threshold_minimum) elif method == 6: name = 'Isodata' thresholded = _thresh(image, filters.threshold_isodata) elif method == 7: name = 'Li' thresholded = _thresh(image, filters.threshold_li()) print("[DEBUG] Method '" + name + "' was selected for threshold") if plot: print("[DEBUG] Comparison with original image requested. Plotting.. ") Loader.hist_compare([image, thresholded], ["Original", name]) return thresholded.astype(np.uint8)
def edgesFunctions(img): log = edg.laplacian_of_gaussian(img, 2) dog = edg.difference_of_gaussian(img, 1.0, 2.5) sobelX = edg.sobel(img, 0) canny = edg.canny(img, 100, 200, sigma=1.5) Loader.hist_compare([log, dog, canny, sobelX], ["LoG", "DoG", "Canny", "Sobel"])
def countInconsistencyFromFile(path, reduceData=False): data=Loader.loadExtensionSensitive(path) if (reduceData): data = Loader.reduceRepetitions(data) inconsistencyCounter=countInconsistency(data) inconsistencyRatio = float(inconsistencyCounter)/len(data) return inconsistencyCounter, inconsistencyRatio
def edges_comparison(input_image): log = edg.laplacian_of_gaussian(input_image, 2) dog = edg.difference_of_gaussian(input_image, 1.0, 2.5) sobelX = edg.sobel(input_image, 0) canny = edg.canny(input_image, 100, 200, sigma=1.5) Loader.hist_compare([log, dog, canny, sobelX], ["LoG", "DoG", "Canny", "Sobel"])
def find_vertical_lines(edgeImage): inverse = Loader.inverse_img(edgeImage) Loader.print_image(inverse) (shapeY, shapeX) = edgeImage.shape lineas = [] for x in np.arange(1, shapeX - 5): y = shapeY - 5 # Busco el primer pixel blanco while inverse[y][x] == 0: y = y - 1 # Estoy en el primer punto vertical blanco # Avanzo hacia arriba hasta el primer hueco negro while inverse[y][x] != 0: y = y - 1 # Estoy en el principio de cornea en negro while inverse[y][x] == 0: y = y - 1 # He acabado la cornea, empieza lo que quiero guardar, blanco vertical = [] while inverse[y][x] != 0: vertical.append((y, x)) y = y - 1 # Acabo el trozo que me interesa, lo añado a lineas lineas.append(vertical.copy()) return lineas
def make_tree(setting, dict_id, dict_vecto, dimension, amount_tree): logging.info("make a tree") t = AnnoyIndex(dimension) for i in dict_id: t.add_item(i, dict_vecto[i]) t.build(amount_tree) loader.save_tree(setting.TREE_PATH, t) return t
def __init__(self): self.memory = Memory() self.loader = Loader(self.memory) self.assembler = Assembler() self.CI = 0 # 12 bits self.ACC = 0 # 8 bits self.output = []
def InitResource(): global database, resource datalist = [line.strip() for line in open(listfile)] database = Loader.LoadDataPair(datalist) resource = Loader.LoadLanguageResource() global TemplateLib, TopicLib, TreeState, Template TemplateLib = Loader.LoadTemplate(template_list) TopicLib = Loader.LoadTopic(topicfile) TreeState, Template = Control.Init()
def InitResource(version): global TemplateLib, TopicLib, TreeState, Template, model, init_id, joke_id, more_id, dictionary_value, turn_id, wizard, isAlltag, engaged_input, engagement_mode global q_table, database, resource, socket, listflie, tfidfmodel, tfidfdict, table_state_strategy database = {} resource = {} listfile = None init_id = 0 joke_id = 0 more_id = 0 wizard = 3 isAlltag = 0 turn_id = 0 engaged_input = [] engagement_mode = 0 rescource_root = 'resource' template_list = [ 'template/template_new.txt', 'template/template_end.txt', 'template/template_open.txt', 'template/template_expand.txt', 'template/template_init.txt', 'template/template_joke.txt', 'template/template_back.txt', 'template/template_more.txt' ] template_list = [path.join(rescource_root, name) for name in template_list] topicfile = path.join(rescource_root, 'topic.txt') tfidfname = 'tfidf_reference' with open('dictionary_value.pkl') as f: dictionary_value = pickle.load(f) if version is 'v1': listfile = 'cnn_qa_human_response_name.list' elif version is 'v2': listfile = 'cnn_qa_human_response_name_high_app.list' elif version is 'v2.5': listfile = 'cnn_qa_human_response_name_high_app.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') elif version is 'v3': listfile = 'cnn_hr_v1_v2.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') elif version is 'v4': listfile = 'cnn_hr_v1_v2_v4.list' tfidfdict = corpora.Dictionary.load(tfidfname + '.dict') tfidfmodel = models.tfidfmodel.TfidfModel.load(tfidfname + '.tfidf') datalist = [line.strip() for line in open(listfile)] q_table = pickle.load(open('q_table.pkl')) database = Loader.LoadDataPair(datalist) resource = Loader.LoadLanguageResource() TemplateLib = Loader.LoadTemplate(template_list) TopicLib = Loader.LoadTopic(topicfile) TreeState, Template = Control.Init() model = models.Doc2Vec.load('/tmp/word2vec_50') if wizard is 2: context = zmq.Context() socket = context.socket(zmq.REQ) socket.connect("tcp://localhost:5555") with open('table_state_strategy.pkl') as f: table_state_strategy = pickle.load(f)
def calculate_feature(img): print(img) with tf.Graph().as_default(): feature_list = [] img_list = [] img = cv2.imread(img) # print(img) img = camera.resize(img, width=1200) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # this is the right place to put the copy, # otherwise it will have empty when the face is too big rects = detector(gray, 1) with tf.Session() as sess: # Load the model ## we need to load the model first, then load each layer Loader.load_model(model) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") print("Running.....") for (i, rect) in enumerate(rects): # print(rect) shape = predictor(gray, rect) shape = camera.shape_to_np(shape) (x, y, w, h) = camera.rect_to_coordinate(rect) try: img = img[y:y + int(h / 2), x:x + int(w / 2)] img = misc.imresize(img, (160, 160), interp='bilinear') x1, y1, a1 = img.shape # when 4 dimension temp = copy.deepcopy(img) temp = temp.reshape([1, x1, y1, a1]) # we put the cropped image to the FaceNet, input shape(1,160,160,3) feed_dict = { images_placeholder: temp, phase_train_placeholder: False } # emb return the facial feature of shape (1,512) emb = sess.run(embeddings, feed_dict=feed_dict) feature_list.append(emb.flatten().tolist()) img_list.append(img.flatten()) except ValueError: print("error") return None # print(feature_list) return feature_list, img_list
def a_to_z(setting, n_components): logging.info("A -> Z") # b1 : make a folder to contain data make_folder(setting.PARENT_FOLDER_PATH) make_folder(setting.FOLDER_DATA_PATH) # b2 : return a client connecting to server elasticsearch es = loader.get_elasticsearch_client(setting.HOST, setting.PORT) # b3 : scan data dict_raw_product = loader.scan_data(es, setting.DOMAIN) # b4 : preprocess raw data dict_product = cp.preprocess_data(setting, dict_raw_product) # b5 : transform to tfidf vector dict_vecto_tfidf, dictionary = cp.transform_vecto_tfidf( setting, cp.split_text(dict_product)) # b6 : map id product with nonnegative integer list_id_product = dict_vecto_tfidf.keys() dict_map_id = map_id_product(list_id_product) # b7 : reduce large matrix # sparse_matrix = cp.reduce_dimention(dict_vecto_tfidf.values(), len(dictionary), n_components=500,batch_size=20000) sparse_matrix = cp.reduce_dimension_svd(dict_vecto_tfidf.values(), len(dictionary), n_components=500) shape = sparse_matrix.shape logging.info("shape : " + str(shape)) dense_matrix = list(sparse_matrix) # b8 : build tree tree = cp.make_tree(setting, dict_id=dict_map_id.keys(), dict_vecto=dense_matrix, dimension=shape[1], amount_tree=10) # b9 : search nns in tree dict_result = {} for i in range(1000): list_nns = tree.get_nns_by_item(i, 11) dict_result[i] = list_nns dict_product = loader.load_dict_product(setting.DICT_PRODUCT_PATH) loader.save_result(setting.DICT_RESULT_PATH + "_" + str(n_components), dict_product, dict_map_id, dict_result) dict_result_id = {} for i in dict_map_id.keys(): list_nns = tree.get_nns_by_item(i, 11) dict_result_id[i] = list_nns loader.save_result_id(setting.DICT_RESULT_ID_PATH, dict_map_id, dict_result_id) #b10 copy file to hdfs loader.save_file_to_hdfs(setting.FOLDER_HDFS_PATH, setting.FILE_NAME_HDFS, setting.DICT_RESULT_ID_PATH)
def cache(self): # Test for file existence, if so, just load it in try: self.docs = Loader.docs_core(self.qno) except: # If not, load docs print "Loading Docs for", self.qno parser = CoreNLPParser.CoreNLPParser(host=self.host, port=self.port) docs = Loader.docs(self.qno) parsed_docs = [] for doc in docs: print "Parsing Docno", doc['docno'], sys.stdout.flush() parsed_doc = {'docno': doc['docno']} for k in ['leadpara', 'headline', 'text']: if doc[k] is None: continue jsons = [] for paragraph in doc[k]: print ".", sys.stdout.flush() # Move the retry here so that we don't have to re-parse # a whole lot of docs for attempt in range(3): try: json = parser.parse(unidecode(paragraph)) break # for except jsonrpc.RPCTransportError as e: if attempt + 1 == 3: print print "---" print unidecode(paragraph) print "---" sys.stdout.flush() raise Exception() continue # for except Exception as e: if attempt + 1 == 3: print print "---" print e print "---" raise e continue # for jsons.append(json) parsed_doc[k] = jsons parsed_docs.append(parsed_doc) print "done" sys.stdout.flush() with open(DIR + '/parsed_docs_core/top_docs.%d' % self.qno, 'wb') as f: pickle.dump(parsed_docs, f) self.docs = parsed_docs
def process_image(img): # Loader.print_image(img) # print("[DEBUG] Showing VISUAL denoising algorithm comparison") # denoising_comparison(img) # print("[DEBUG] Showing HISTOGRAM denoising algorithm comparison") # denoising_comparison(img, True) # DENOISING IMAGE denoised_img = smooth.median_filter(img, 9) denoised_img = smooth.median_filter(denoised_img, 7) # PRINT DENOISED IMAGE AND HISTOGRAM #Loader.print_image(denoised_img) # Loader.hist_and_cumsum(denoised_img) # thresholding_comparison(denoised_img) th_img = thresh.apply_thresholding_algorithm(denoised_img, thresh.THRESH_TRIANGLE) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5)) stretched = cv2.morphologyEx(th_img, cv2.MORPH_ERODE, kernel) back, front = thresh.get_regions(denoised_img, stretched) # Loader.hist_compare([back, front], ["Back", "Front"]) #Loader.print_image(front) eq = Loader.equalization(front.astype("uint8")) eq = bright_and_contrast(eq, 2.8, 80) eq = smooth.gaussian(eq, 2.5) Loader.print_image(eq) # Loader.hist_and_cumsum(eq) # EDGE DETECTION #edgesFunctions(eq) # Comparison of different edge detection method edges = edg.laplacian_of_gaussian(eq, 2) Loader.print_image(edges) # Fill the cornea area with white pixels dilated = fill_cornea(edges) #Loader.print_image(dilated) #Calculate distances in the cornea-lens region #lineas = dw.find_vertical_lines(dilated) #diferencias,posiciones = dw.calculate_differences(lineas) #dw.draw_graph_distance(diferencias, posiciones) #output_image = dw.lines_image(lineas, img) # Surround the córnea area and lens edges with visible and thin line (i, contornos, jerarquia) = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = [] for c in contornos: if cv2.contourArea(c) < 1000: continue else: cnts.append(c) cv2.drawContours(img, cnts, -1, (0, 0, 255), 3) #Loader.print_image(img) return img
def InitResource(): global database, resource, socket datalist=[line.strip() for line in open(listfile)] database = Loader.LoadDataPair(datalist) resource = Loader.LoadLanguageResource() global TemplateLib, TopicLib, TreeState, Template TemplateLib = Loader.LoadTemplate(template_list) TopicLib = Loader.LoadTopic(topicfile) TreeState, Template = Control.Init() context= zmq.Context() #print("connectting to server") socket = context.socket(zmq.REQ) socket.connect("tcp://localhost:5555")
def cache(self): # Test for file existence, if so, just load it in try: self.docs = Loader.docs_core(self.qno) except: # If not, load docs print "Loading Docs for", self.qno parser = CoreNLPParser.CoreNLPParser(host=self.host,port=self.port) docs = Loader.docs(self.qno) parsed_docs = [] for doc in docs: print "Parsing Docno", doc['docno'], sys.stdout.flush() parsed_doc = { 'docno': doc['docno'] } for k in ['leadpara', 'headline', 'text']: if doc[k] is None: continue jsons = [] for paragraph in doc[k]: print ".", sys.stdout.flush() # Move the retry here so that we don't have to re-parse # a whole lot of docs for attempt in range(3): try: json = parser.parse(unidecode(paragraph)) break # for except jsonrpc.RPCTransportError as e: if attempt+1 == 3: print print "---" print unidecode(paragraph) print "---" sys.stdout.flush() raise Exception() continue # for except Exception as e: if attempt+1 == 3: print print "---" print e print "---" raise e continue # for jsons.append(json) parsed_doc[k] = jsons parsed_docs.append(parsed_doc) print "done" sys.stdout.flush() with open(DIR+'/parsed_docs_core/top_docs.%d' % self.qno, 'wb') as f: pickle.dump(parsed_docs, f) self.docs = parsed_docs
def cache(self): try: self.questions = Loader.questions_core() except: print "Parsing Questions..." parser = CoreNLPParser.CoreNLPParser(host=self.host,port=self.port) qs = Loader.questions() parsed_qs = {} for qno, q in qs.iteritems(): json = parser.parse(unidecode(q['question'])) parsed_qs[qno] = json with open(DIR+'/parsed_questions_core.txt', 'wb') as f: pickle.dump(parsed_qs, f) self.questions = parsed_qs
def question_loader(): questions = Loader.questions() answers = Loader.answers() for qno, question in questions.iteritems(): print qno docs_posne = Loader.docs_posne(qno) docs = Loader.docs(qno) lines, docnos = search(question, docs, docs_posne) evaluate([w.lower() for w in answers[int(qno)]['answers'][0].split()], lines) count = 0 for docno in answers[int(qno)]['docnos']: if docno in docnos: count += 1 print count
def cache(self): try: self.questions = Loader.questions_core() except: print "Parsing Questions..." parser = CoreNLPParser.CoreNLPParser(host=self.host, port=self.port) qs = Loader.questions() parsed_qs = {} for qno, q in qs.iteritems(): json = parser.parse(unidecode(q['question'])) parsed_qs[qno] = json with open(DIR + '/parsed_questions_core.txt', 'wb') as f: pickle.dump(parsed_qs, f) self.questions = parsed_qs
class Runner(object): def __init__(self, path, startstates=[]): self.path = path self.fsm = FSM() self.loader = Loader(path) self.startstates = startstates def prepare(self): self.loader.load() self.fsm.startstates = self.startstates self.fsm.actionset = self.loader.actions self.fsm.explore() def run(self): self.fsm.execute() def savesvg(self): self.fsm.savesvg()
def setup_screen(self): self.screen = pygame.display.set_mode((screen_size[0], screen_size[1])) pygame.display.set_caption("Asteroids") self.image = Loader.load_image("Background.png") self.rect = self.image.get_rect() self.rect.left, self.rect.top = [0, 0] self.backgroundcolor = 255, 255, 255
def check_sentiment_vader(self): loader = ld.Loader(self.size) sent_analyzer = SentimentIntensityAnalyzer() def apply_score(tweet): loader.loading() return sent_analyzer.polarity_scores(tweet) def sentiment_value(compound): value = '' if compound < -0.05: value = 'negative' elif compound > 0.05: value = 'positive' else: value = 'neutral' return value f = lambda x: apply_score(x) g = lambda x: sentiment_value(x) sentiment_scores = self.tweets.apply( f) #returns data frame of dictionaries sentiment_scores = sentiment_scores.apply( pd.Series) #splits dictionaries with keys as columns sentiment_compound = sentiment_scores['compound'] sentiment_values = sentiment_compound.apply(g) return sentiment_values, sentiment_compound
def prepare_data(setting): # b1 : make a folder to contain data make_folder(setting.PARENT_FOLDER_PATH) make_folder(setting.FOLDER_DATA_PATH) # b2 : return a client connecting to server elasticsearch es = loader.get_elasticsearch_client(setting.HOST, setting.PORT) # b3 : scan data dict_raw_product = loader.scan_data(es, setting.DOMAIN) # b4 : preprocess raw data dict_product = cp.preprocess_data(setting, dict_raw_product) # b5 : transform to tfidf vector cp.transform_vecto_tfidf(setting, cp.split_text(dict_product))
def thresholding_comparison(img): #Thresholding algoritms precalculation for comparison triangle = thresh.apply_thresholding_algorithm(img, thresh.THRESH_TRIANGLE) mean = thresh.apply_thresholding_algorithm(img, thresh.THRESH_MEAN) otsu = thresh.apply_thresholding_algorithm(img, thresh.THRESH_OTSU) yen = thresh.apply_thresholding_algorithm(img, thresh.THRESH_YEN) minimum = thresh.apply_thresholding_algorithm(img, thresh.THRESH_MIMIMUM) isodata = thresh.apply_thresholding_algorithm(img, thresh.THRESH_ISODATA) # li = thresh.apply_thresholding_algorithm(img, thresh.THRESH_LI) thresholded_imgs = [img, triangle, mean, otsu, yen, minimum, isodata] thresholded_titles = [ "Original", "Triangle", "Mean", "Otsu", "Yen", "Minimum", "Isodata" ] Loader.hist_compare(thresholded_imgs, thresholded_titles)
def Visualize(filename): data_path = param.DATA_DIR + filename + ".pcd" calib_path = param.CALIB_DIR + filename + ".txt" label_path = param.LABEL_DIR + filename + ".txt" print "data path : " + data_path print "calib path : " + calib_path print "label path : " + label_path pc_pub = rospy.Publisher("cnn_3d_points_raw", PointCloud2, queue_size=100000) rospy.init_node("cnn_3d") header = std_msgs.msg.Header() header.stamp = rospy.Time.now() header.frame_id = "cnn_3d" marker_array_pub = rospy.Publisher("cnn_3d_anchor_obj", MarkerArray, queue_size=1000) sleep_rate = rospy.Rate(1) point_cloud, gt_objectness = Loader.get_visualize_input( data_path, calib_path, label_path) points = pc2.create_cloud_xyz32(header, point_cloud[:, :3]) marker_array = Utils.get_marker_array(gt_objectness, type="anchor") while not rospy.is_shutdown(): pc_pub.publish(points) marker_array_pub.publish(marker_array) sleep_rate.sleep()
def NotAugmentedRun(db, version, estm, loss, optimizer, metric=[], callback=[], verbose=1): schema = Loader.getSchema(db, version, estm) schema.summary() schema.plot(estm) estimator = getattr(Estimator, estm) estimator = estimator(schema.getModel()) estimator.compile(loss=loss, optimizer=optimizer, metric=metric) history = estimator.fit(db, verbose=verbose, callbacks=callback) history.plot(estimator.name + '_' + db.name + '_NotAugmented') print(estimator.evaluate(db.X_train, db.Y_train())) y_pred = np.argmax(estimator.predict(db.X_train), axis=-1) print(Utils.classificationReport('train', db.y_train, y_pred)) print(estimator.evaluate(db.X_test, db.Y_test())) y_pred = np.argmax(estimator.predict(db.X_test), axis=-1) print(Utils.classificationReport('test', db.y_test, y_pred)) Utils.rocCurve( db.name + '_' + schema.name + '_' + estimator.name + '_NotAugmented', db.Y_test(), estimator.predict(db.X_test), db.info['n_cls']) schema.saveWeights(estimator.name + '_' + db.name + '_NotAugmented') schema.extract(estimator.name + '_NotAugmented', db)
def visualize(self): global device arr = [] colors = [] train_loader, validation_loader = Loader.load_data(from_pickle=True, batch_size=1) for x in train_loader: input, label = x input = input.to(device) self.net(input) res = self.res.cpu().detach().numpy()[0] arr.append(res) colors.append(label.detach().numpy()[0]) X = (TSNE(n_components=2).fit_transform(arr)) x = [r[0] for r in X] y = [r[1] for r in X] fig, ax = plt.subplots() scatter = ax.scatter(x, y, c=colors) legend = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") ax.add_artist(legend) plt.savefig('TSNE_test' + str(layer) + '.png')
def loadAndCount(pathToFile, reduceData=False): loadedData = Loader.loadExtensionSensitive(pathToFile) if reduceData: loadedData = Loader.reduceRepetitions(loadedData) rows = len(loadedData) cols = len(loadedData[0]) fields = rows*cols dicts=countOccurency(loadedData) pDicts = countProbabilities(dicts) eDicts = countEntropyOfDicts(pDicts) sumEnt, meanEnt = countEntropyInData(eDicts) metricEnt = sumEnt/(float)(rows) bitsToSaveData = sumEnt*(float)(rows) numberOfInstances = len(loadedData) valuesOfAtts,importantAtts = countValuesOfAttributes(dicts) return (sumEnt,meanEnt,metricEnt,bitsToSaveData,numberOfInstances,valuesOfAtts,importantAtts)
def main(fenin, evalin, fenout, evalout): with open(fenin, 'r') as fin: fen = [s[:-1] for s in fin.readlines()] with open(evalin, 'r') as fin: streval = [s[:-1] for s in fin.readlines()] lfens = [Loader.fenToInputs(f) for f in fen] fen, streval = removeDuplicates(fen, streval, lfens) evals = [Loader.evalSimplify(e) for e in streval] fen, streval = unbias(fen, streval, evals) fout = open(fenout, 'a+') eout = open(evalout, 'a+') for f, e in zip(fen, streval): print(f, file=fout) print(e, file=eout) fout.close() eout.close()
def load(self, record_iterator, fetch_NCBI_taxonomy=False): """Load a set of SeqRecords into the BioSQL database. record_iterator is either a list of SeqRecord objects, or an Iterator object that returns SeqRecord objects (such as the output from the Bio.SeqIO.parse() function), which will be used to populate the database. fetch_NCBI_taxonomy is boolean flag allowing or preventing connection to the taxonomic database on the NCBI server (via Bio.Entrez) to fetch a detailed taxonomy for each SeqRecord. Example: from Bio import SeqIO count = db.load(SeqIO.parse(open(filename), format)) Returns the number of records loaded. """ db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \ fetch_NCBI_taxonomy) num_records = 0 for cur_record in record_iterator: num_records += 1 db_loader.load_seqrecord(cur_record) return num_records
def __init__(self, keyword): self.URL = URLer.URLer() self.load = Loader.Loader() self.Parse = Parser.Parser() keyword = self.Parse.quote_(keyword) self.Out = Outer.Outer() self.root = self._url + keyword
def Setup(PersonID): print("Starting Tweet Scraper") global maindict PersonInfo = {PersonID : { "oldmax_id" : 0, "laststop_id" : 0 }} print("Parsing account id:{}".format(PersonID)) try: ParserInfo = Loader.ParserInfoReader() print("Successfully loaded information for account id: {}".format(PersonID)) except: print("Couldn't load local information for account id: {}".format(PersonID)) pass else: if PersonID in ParserInfo.keys(): PersonInfo[PersonID]['oldmax_id'] = ParserInfo[PersonID]['oldmax_id'] PersonInfo[PersonID]['laststop_id'] = ParserInfo[PersonID]['laststop_id'] if (ratelimit()): start = API.user_timeline(id = PersonID, count = 1,tweet_mode='extended') else: print("API Limit Reached") quit() newmax_id = start[0]._json['id'] oldmax_id = PersonInfo[PersonID]['oldmax_id'] laststop = PersonInfo[PersonID]['laststop_id'] if oldmax_id == 0: PersonInfo[PersonID]['oldmax_id'] = newmax_id ParseInfo = dict() ParseInfo["PersonID"] = PersonID ParseInfo["PersonInfo"] = PersonInfo ParseInfo["newmax"] = newmax_id ParseInfo["start"] = start return ParseInfo
def TweetParser(tweets): for tweet in tweets: if tweet._json['full_text'][:2] != "RT": #By excluding tweets where the first two letters are RT #only non-retweets(i.e tweets written by user), are scraped maindict[tweet._json['id']] = tweet._json['full_text'] Loader.TweetFileWriter(maindict)
def occurence_counter( self, key): # emots, hash etc counter - returns descending sorted array # and panda data frame wich stores hash count for every tweet key_list = [] occurence_counter = [] loader = ld.Loader(self.size) for tweet in self.tweets: list = re.findall(patterns[key], tweet) if list: list = [ hash.lower() for hash in list ] # changes hash text to lowercase to avoid case sensivity occurence_counter.append(len(list)) for ele in list: counter = 0 if len(ele) != 1: for tag in key_list: if ele == tag[0]: tag[1] += 1 counter = 1 if counter == 0: key_list.append([ele, 1]) else: occurence_counter.append(0) loader.loading() sorted_key_list = sorted(key_list, key=lambda x: x[1], reverse=True) sorted_key_list = pd.DataFrame(sorted_key_list) return sorted_key_list, occurence_counter
def parse_real_answers_in_docs(): answers = Loader.answers() found_answers = {} for i in range(201, 400): found_answers[i] = { 'question': answers[i]['question'], 'answers': [], 'docnos': []} docs = Loader.docs(i) for answer in answers[i]['answers']: found_docs = [] for d_idx, d in enumerate(docs): if d['text'] is not None: for para in d['text']: if answer.lower() in para.lower(): found_docs.append((d_idx, d['docno'])) if len(found_docs) > 0: found_answers[i]['answers'].append(answer) found_answers[i]['docnos'].append(found_docs) with open(DIR+'/parsed_real_answers.txt', 'wb') as f: pickle.dump(found_answers, f)
def init(): global pluginNumber global modules plugins = Loader.plugins(modulesPath) plugins.crawler() plugins.load() pluginNumber = len(plugins.pluginTree) modules = plugins.modules Banner(VERSION,pluginNumber)
def on_edit_lexicon( self, selection ): lexicon_data, row_paths = selection.get_selected_rows() names_store = self.builder.get_object("generated_names") names_store.clear() #TODO use a loader from file first for path in row_paths: filepath = lexicon_data.get_value( lexicon_data.get_iter( path ), 0 ) for word, weight in Loader.loadLexiconFile( filepath ).items(): names_store.append((word,weight))
def selectLexiconTokenizerGeneratorFilters( cfg ): #TODO cache small lexicons in memory files = cfg.get('lexicon','<selected_files>') or cfg.get('lexicon','files') or [] if cfg.get('lexicon','use_patterns',default=True): lexicon = Loader.loadLexiconsFromPatterns( files ) else: lexicon = Loader.loadLexicons( files ) if len( lexicon ) == 0: raise EmptyLexicon() tokenizer = selectTokenizer( cfg, lexicon ) def to_token_sequence( keyval ): return tokenizer.tokenize( keyval[0] ), keyval[1] tokenized_lexicon = dict( map( to_token_sequence , lexicon.items() ) ) generator = selectGenerator( cfg, tokenized_lexicon ) filters = selectFilters( cfg, lexicon ) return lexicon, tokenizer, generator, filters
def evaluate_answers(filename): fake_answers = load_fake_answers(filename) real_answers = Loader.real_answers() questions = Loader.questions() # print len([True for ra in real_answers.itervalues() if len(ra['answers']) == 0]) with open('answer_eval_%s' % filename, 'w') as f: writer = csv.writer(f) for i in range(201, 400): real_answer = real_answers[i] if len(real_answer['answers']) > 0: m_all_counts = evaluate_answer(real_answer['answers'], fake_answers[i]) correct = False for m_counts, keyword_len in m_all_counts: for m_count in m_counts: if float(m_count) >= 1.0 * keyword_len: correct = True writer.writerow([i, questions[i]['question_classification'], 1 if correct else 0, m_all_counts]) if not correct: print i, questions[i]['question_classification'], real_answer['question'], real_answer['answers'], m_all_counts
def answer_all(answerer, use_chunk=False): with open('data/naive_out.txt', 'w') as fout: questions = Loader.questions() for qno, question in questions.iteritems(): docs = CoreNLPLoader(qno) print qno answer = answerer(question, docs, use_chunk) print answer if answer == None: fout.write("%d top_docs.%d nil\n" % (qno, qno)) else: for ans in answer[:5]: fout.write("%d top_docs.%d %s\n" % (qno, qno, ans))
def construct(self): Membre.membres[self.membre]={} #on ne récupère ici qu'un seul objet.(genre objet blender pas objet reel) nao=Loader().load(self.membre)[0] self.multipleColors=nao.multipleColors() self.changeColor=nao.material#getFaceNbColors() Membre.membres[self.membre]["boolText"]=nao.hasTexture Membre.membres[self.membre]["len"]=len(nao.tabCoordInd) self.boolText=nao.hasTexture V=[] for a in range(len(nao.tabCoordInd)): V.append(nao.tabCoord[nao.tabCoordInd[a]-1]) VN=[] for a in range(len(nao.tabNormInd)): VN.append(nao.tabNorm[nao.tabNormInd[a]-1]) #Create the VBO v = numpy.array([V], dtype=numpy.float32) Membre.membres[self.membre]["vVBO"] = vbo.VBO(v) #Create the VBO vn = numpy.array([VN], dtype=numpy.float32) Membre.membres[self.membre]["vnVBO"] = vbo.VBO(vn) if self.boolText: VT=[] for a in range(len(nao.tabTextInd)): VT.append(nao.tabText[nao.tabTextInd[a]-1]) #Create the VBO vt = numpy.array([VT], dtype=numpy.float32) Membre.membres[self.membre]["vtVBO"] = vbo.VBO(vt)
def start_game(self): self.game = Loader.load_for_tribegame() self.game.parent = self
def create_all(self): return Loader.load_npcs()
# chunks.append( "nil" ) # n += 1 return chunks if __name__ == '__main__': argparser = argparse.ArgumentParser() # argparser.add_argument('-c', action='store_true', dest="chunk", help="chunk answers?") argparser.add_argument('-n', type=int, action='store', default=5, dest="n_chunks", help="no. of answers to give") argparser.add_argument('-l', type=int, action='store', default=400, dest="l", help="first question # to answer") argparser.add_argument('-u', type=int, action='store', default=600, dest="u", help="1 + last question # to answer") argparser.add_argument('-p', type=str, action='store', default="output_", dest="out_prefix", help="Prefix of output files") args = argparser.parse_args() qf = QuestionFeatures() questions = Loader.questions() f_nochunk = open(args.out_prefix+"nochunk.txt", 'w') f_chunk = open(args.out_prefix+"chunk.txt", 'w') for qno in range(args.l,args.u): a = Answerer(questions[qno], qf, qno) answers = a.answer() # pprint(answers) # if args.chunk: # chunks = a.chunk(answers, n_chunks=args.n_chunks) # else: # chunks = a.nonchunk(answers, n_chunks=args.n_chunks) # print "\n".join( ["%d top_docs.%d "%(qno,qno) + chunk for chunk in chunks] ) chunks = a.nonchunk(answers, n_chunks=args.n_chunks) f_nochunk.write("\n".join( ["%d top_docs.%d "%(qno,qno) + chunk for chunk in chunks] )+"\n") # Only prints chunked version to stdout
def load_image(self): self.image, self.rect = Loader.load_image('ball.png', -1) # self.image = pygame.transform.scale(self.image, (25, 25)) self.image = pygw.transform(self.image, (25,25))
KNeighborsClassifier(5), KNeighborsClassifier(7), KNeighborsClassifier(9), #SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(), GaussianNB(), LDA(), #QDA() ] combinedClf = cmb.CombinedClassifier(classifiers) wholeDataSet=np.array(ldr.loadData("C:\Users\CJank\Desktop\Dyskretyzator\data\\iris_number.data")) X = wholeDataSet[:,0:-1] y = wholeDataSet[:,-1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5) combinedClf.fit(X_train,y_train) results = combinedClf.predict(X_train) CVresults = cross_val_score(combinedClf, X, y, cv=5) CVres = np.array(CVresults).mean() print CVres resultsOfCV=[] for ds in files: wholeDataSet=np.array(ldr.loadData(ds)) X = wholeDataSet[:,0:-1] y = wholeDataSet[:,-1]
def populate_lexicon(self): filepaths = Loader.findFilesFromPatterns(self.cfg_.get("lexicon", "files", default=[])) for filepath in sorted(filepaths): self.load_lexicon(filepath) self.treeview_.get_selection().select_all()
def load_image(self): self.image, self.rect = Loader.load_image(self.name) self.image = pygw.transform(self.image, (75, 75))
def __init__(self, path, startstates=[]): self.path = path self.fsm = FSM() self.loader = Loader(path) self.startstates = startstates
def __init__(self): self.qs = Loader.questions() self.qs_core = CoreNLPQuestionLoader()
meanEntropy=sumEntropy/len(entropyDict) return (sumEntropy,meanEntropy) def loadAndCount(pathToFile, reduceData=False): loadedData = Loader.loadExtensionSensitive(pathToFile) if reduceData: loadedData = Loader.reduceRepetitions(loadedData) rows = len(loadedData) cols = len(loadedData[0]) fields = rows*cols dicts=countOccurency(loadedData) pDicts = countProbabilities(dicts) eDicts = countEntropyOfDicts(pDicts) sumEnt, meanEnt = countEntropyInData(eDicts) metricEnt = sumEnt/(float)(rows) bitsToSaveData = sumEnt*(float)(rows) numberOfInstances = len(loadedData) valuesOfAtts,importantAtts = countValuesOfAttributes(dicts) return (sumEnt,meanEnt,metricEnt,bitsToSaveData,numberOfInstances,valuesOfAtts,importantAtts) if __name__ == "__main__": file = "C:\Users\CJank\Desktop\\tmp\\wineDscr.arff" loadedData = Loader.loadExtensionSensitive(file) dicks=countOccurency(loadedData) pDics = countProbabilities(dicks) eDicts = countEntropyOfDicts(pDics) sumEnt, meanEnt = countEntropyInData(eDicts) print(dicks) print (pDics) print (eDicts) print ("Sum: "+(str)(sumEnt) +" Mean: "+(str)(meanEnt)+"")
def init_resource(): global resource resource = Loader.load_language_resource(idf_file)
def load_game(): hide_menu() current_load = Loader.load_save_state() looper = game_looper.Looper(root) looper.load(current_load.door, current_load.person)
consistent = True for b in range(min(a+1,len(loadedData)-1),len(loadedData)): if(checkIfInconsistencyOccurs(loadedData[a],loadedData[b])): knownInconsistencies.add(a) knownInconsistencies.add(b) return len(knownInconsistencies) def checkIfInconsistencyOccurs(instanceA, instanceB): numberOfAtts = len(instanceA) inconsistency = False for col in range(numberOfAtts): if(col< numberOfAtts-1): if(instanceA[col]!=instanceB[col]): break else: if(instanceA[col]!=instanceB[col]): inconsistency=True return inconsistency def countInconsistencyFromFile(path, reduceData=False): data=Loader.loadExtensionSensitive(path) if (reduceData): data = Loader.reduceRepetitions(data) inconsistencyCounter=countInconsistency(data) inconsistencyRatio = float(inconsistencyCounter)/len(data) return inconsistencyCounter, inconsistencyRatio if __name__ == "__main__": data=Loader.loadExtensionSensitive("C:\Users\CJank\Desktop\Dyskretyzator\Results_\\australianDiscretizationResults_Reduced.txt") c=countInconsistency(data) print c