def main(args): print("Get cooc of each doc from corpus") cooc_model = Processing() # savepath = "sample_data/" savepath = args.savepath coocpath = savepath + 'cooc/' filepath = args.filepath if not os.path.isdir(coocpath): os.system('mkdir ' + coocpath) if args.data_type == 'csv': df = pd.read_csv(filepath) else: df = pd.read_csv(filepath, sep='\t', ) print("Creation Finished.. Starts new job") print(" ") print("Make a graph") cooc_path_list = get_cooc_filenames(coocpath) feature_model = Feature(doc_path_list=cooc_path_list, dataframe=df) print("Make all features and load all to dataframe ") df = feature_model.make_df_from_dataset() df.to_csv(savepath + 'result_0~10.csv') # change name print("Completed")
def compute_feature_vec(orders): feature_vec = [] for element in orders: if element > 3 or element < 0: raise ValueError('Can only create monomials of degree 0 to 3') # constant if 0 in orders: feature_vec = [Feature(np.array([]), 'multiply')] # first order monomials: linear (additional features: 15, total 16) if 1 in orders: for i in range(n_dimensions_x): feature_vec.append(Feature(np.array([i]), 'multiply')) if 2 in orders: # second order monomials: quadratic (additional features: 15*15 = 225, total 241) for i in range(n_dimensions_x): for j in range(n_dimensions_x): feature_vec.append(Feature(np.array([i, j]), 'multiply')) if 3 in orders: for i in range(n_dimensions_x): for j in range(n_dimensions_x): for k in range(n_dimensions_x): feature_vec.append(Feature(np.array([i, j, k]), 'multiply')) return feature_vec
def __init__(self): self.threshold_score = 1.0 self.size_threshold = 20 self.k = 57 self.top_10_cliques = './top_cliques/' self.adj_list = Graph().read_adjacency_list() self.feature_vector = Feature().read_features() self.topk_features = Feature().read_topk_features(self.k)
def main(): # ================== setup myo-python (do not change) ===================== myo.init(sdk_path='../../myo_sdk') # Compile Python binding to Myo's API hub = myo.Hub() # Create a Python instance of MYO API if not ConnectionChecker( ).ok: # Check connection before starting acquisition: quit() # ========================================================================= # calculate the Mean Absolute Value # Setup our custom processor of MYO's events. # EmgBuffer will acquire new data in a buffer (queue): listener = Buffer( buffer_len=512 ) # At sampling rate of 200Hz, 512 samples correspond to ~2.5 seconds of the most recent data. calculate = Feature(input_len=512) # Setup multichannel plotter for visualisation: plotter = MultichannelPlot( nchan=8, xlen=512 ) # Number of EMG channels in MYO armband is 8 , window size is 15 for MAV freq = 200 move = cursor(freq) # Tell MYO API to start a parallel thread that will collect the data and # command the MYO to start sending EMG data. with hub.run_in_background( listener ): # This is the way to associate our listener with the MYO API. print('Streaming EMG ... Press shift-c to stop.') while hub.running: time.sleep(0.040) # Pull recent EMG data from the buffer emg_data = listener.get_emg_data() # Transform it to numpy matrix emg_data = np.array([x[1] for x in emg_data]) # avoid len() report error if (emg_data.ndim == 2): if (emg_data.shape[0] == 512): # calculate MAV of emg data mav_data = calculate.MAV(emg_data) mav_data = np.array(mav_data.T) plotter.update_plot(mav_data) move.move_cursor(mav_data) if keyboard.is_pressed('C'): print('Stop.') break
def add_feature(self, id_, time=None): if self.has_feature(id_): return try: feature = Feature(id_, 'anatomy_features') except KeyError: pass else: if feature.slot is not None: self.remove_feature_by_slot(feature.slot) self.features.append(feature)
def create(path, total, index): list = [] counter = 0 ''' #检验重复是否可用 zero = Feature("(1*2+3)*4","5*4") one = Feature("(2*1+3)*4","5*4") list.append(zero) if check(one, list): list.append(one) else: counter +=1 ''' ''' #先生成一次题目,重复的不插入 for i in range(total): string,symble = que_creation(index) a = Feature(string,symble) if check(a,list) : list.append(a) else: counter +=1 continue print("counter:%d" %counter) #补足缺少的题(有漏洞,此方法报废) for i in range(counter): string, symble = que_creation(index) a = Feature(string, symble) if check(a, list): list.append(a) else: counter += 1 continue ''' #生成足够数量的题目 while (counter < total): string, symble = que_creation(index) a = Feature(string, symble) if check(a, list): list.append(a) counter += 1 else: continue with open(path, 'w', encoding='utf-8') as x: line = 0 for i in list: line += 1 x.write(str(line) + '. ' + i.problem + '\n')
def create(path,total,index): list = [] counter = 0 ''' zero = Feature("(1*2+3)*4","5*4") one = Feature("(2*1+3)*4","5*4") list.append(zero) if check(one, list): list.append(one) else: counter +=1 ''' for i in range(total): string,symble = que_creation(index) a = Feature(string,symble) if check(a,list) : list.append(a) else: counter +=1 continue print("counter:%d" %counter) for i in range(counter): string, symble = que_creation(index) a = Feature(string, symble) if check(a, list): list.append(a) else: counter += 1 continue with open(path, 'w', encoding='utf-8') as x: line = 0 for i in list: line += 1 x.write(str(line) + '. ' + i.problem + '\n')
def merge_characteristics(self, name, merge_threshold): # returns if one or more characteristics were merged (boolean) logging.debug("Merging characterics") characteristics = self.db.characteristics.find({"name": name}) meta = Meta(name, self.db) chars = list(characteristics) # find "best fit" (other characteristic with minimal distance) for each characteristic best_fits = [None]*len(chars) # list of tuples: (index of best fit, distance) best_distance = 1. for first_i, first in enumerate(chars): for second_i, second in enumerate(chars): if (first['_id'] is not second['_id']) and (idents_disjoint(first['ident'], second['ident'])): distance = Feature.from_db(first).distance_to(Feature.from_db(second), meta.get_attr_ranges()) if (not best_fits[first_i]) or (distance < best_fits[first_i][2]): best_fits[first_i] = (first_i, second_i, distance) if distance < best_distance: best_distance = distance if best_distance > (1-merge_threshold): return False # signalize that no characteristic needed to be merged for bf in best_fits: if not bf: continue # continue if first has been merged before if not best_fits[bf[1]]: continue # continue if second has been merged before first = Feature.from_db(chars[bf[0]]) second = Feature.from_db(chars[bf[1]]) first.merge(second) self.db.characteristics.save(first.db_entry()) self.db.characteristics.remove({"_id": second._id}) best_fits[bf[0]] = None best_fits[bf[1]] = None # TODO recalculate best_fits with same bf[0] return True # signalize that one or more characteristics were merged
def _load_feature(self,name,type_name,virtual=False,virtual_function_code=None): if name in self._features: #Feature already exist feature = self.get_feature(name) feature.virtual_function_code = virtual_function_code self.get_datastore().map(name, feature.format_function) #f feature._refresh() else: feature = Feature.create_feature(self,name,type_name,virtual) self.set_feature(name, feature) feature.seq_order = self._get_next_seq_order() feature.virtual_function_code = virtual_function_code self.get_datastore().map(name, feature.format_function) #force value types feature._discover() return feature
def parseFeaturedElement(self, element, stream): """Parses an Element (Terminal or Nonterminal) with attached features. We do this by first doing a normal parse of the target element and then trying to unify it's semantic value with the attached features. If they unify, we return the unified value as the semantic value; if they don't unify, we fail the parse.""" target = element.target if issubclass(type(target), Terminal): parser = self.parseTerminal elif issubclass(type(target), Nonterminal): parser = self.parseNonterminal else: raise ValueError("Features can only be attached to Terminals and Nonterminals") for aparse in parser(target, stream): # We're not doing the unification yet unified = Feature.unify(element.features, aparse) print "Unifying ", element.features, aparse if unified: yield unified
def add_feature(self, id_): self.features.append(Feature(id_, self.features_data_dict))
import sys from features import Feature if __name__ == '__main__': ui = Feature()
def main(args): print("Get cooc of each doc from corpus") cooc_model = Processing() savepath = "sample_data/" coocpath = savepath + 'cooc/' filepath = args.filepath if not os.path.isdir(coocpath): os.system('mkdir ' + coocpath) if args.data_type == 'csv' or 'tsv': if args.data_type == 'csv': df = pd.read_csv(filepath) # path 가 현재는 dir, 근데 else: df = pd.read_csv( filepath, sep='\t', ) with tqdm(total=len(df['text'] [18087:18200])) as pbar: #change index here no_processed_idx = [] f = open(savepath + "no_processed_index.txt", 'a', encoding='utf-8') f.write("Not process index:\n") for idx, text in enumerate( df['text'][18087:18200]): #change index here try: pbar.update(1) cooc_model.cooc(text=text, savepath="{0}/{1}.csv".format( coocpath, idx + 18087)) except Exception as e: f.write("{}, index:{}\n".format(e, idx + 18087)) f.close() print(" ") print("Creation Finished.. Starts new job") print(" ") print("Make a graph") feature_model = Feature(doc_path_list=coocpath, dataframe=df) print("Make all features and load all to dataframe ") df = feature_model.make_df_from_dataset() df.to_csv(savepath + 'result.csv') print("Completed") elif args.data_type == 'txt' or 'text': path_fake = savepath + '/data/fake' path_true = savepath + '/data/true' doc_path_list_f = get_doc_filenames(path_fake) doc_path_list_t = get_doc_filenames(path_true) doc_label = [0] * len(doc_path_list_f) + [1] * len(doc_path_list_t) df = pd.DataFrame(doc_label, columns=['label']) with tqdm(total=len(doc_path_list_f), desc="co-occurrence matrix creation - fake news") as pbar: for idx, doc_path in enumerate(doc_path_list_f): pbar.update(1) cooc_model.cooc(filepath=doc_path, savepath="{0}/{1}.csv".format(path_fake, idx)) with tqdm(total=len(doc_path_list_t), desc="co-occurrence matrix creation - true news") as pbar: for idx, doc_path in enumerate(doc_path_list_t): pbar.update(1) cooc_model.cooc(filepath=doc_path, savepath="{0}/{1}.csv".format(path_true, idx)) print(" ") print("Creation Finished.. Starts new job") print(" ") print("Make a graph") cooc_f_list = get_cooc_filenames(document_path=path_fake) cooc_t_list = get_cooc_filenames(document_path=path_true) cooc_path_list = cooc_f_list + cooc_t_list feature_model = Feature(doc_path_list=cooc_path_list, dataframe=df) print("Make all features and load all to dataframe ") df = feature_model.make_df_from_dataset() df.to_csv(savepath + '/data/' + 'result.csv') print("Completed")
class ExtendFeatureClique(): ''' Takes in a clique and extends it iteratively, by adding vertices as per best feature heuristic till the score is more than threshold ''' def __init__(self): self.threshold_score = 1.0 self.size_threshold = 20 self.k = 57 self.top_10_cliques = './top_cliques/' self.adj_list = Graph().read_adjacency_list() self.feature_vector = Feature().read_features() self.topk_features = Feature().read_topk_features(self.k) def extend(self, clique, index): # @param clique, list of nodes in the current clique # @param nodes, list of nodes # @return maximal clique above threshold clique_size = len(clique) degree_num = clique_size * (clique_size - 1.0) nodes = self.adj_list[index][:] nodes[-1] = nodes[-1].strip('\n') clique[-1] = clique[-1].strip('\n') for vertex in clique: if vertex in nodes: nodes.remove(vertex) while True: if len(nodes) == 0: break d = {} nums = {} max_benefit = 0.0 for node in nodes: increment = 0 score = 0.0 max_score = (reduce( lambda x, y: x + y, self.topk_features.values())) * clique_size * 1.0 for vertex in clique: for key, value in self.topk_features.items(): if key in self.feature_vector[ node] and key in self.feature_vector[vertex]: if self.feature_vector[node][ key] == self.feature_vector[vertex][key]: score += value if node in self.adj_list[vertex]: increment += 1 d[node] = (score * 1.0) nums[node] = 2 * increment max_benefit = max(max_benefit, d[node]) for key, value in d.items(): if value == max_benefit: degree_num += nums[key] clique_size += 1 degree_score = (degree_num * 1.0) / (clique_size * (clique_size - 1.0) * 1.0) if clique_size > self.size_threshold: break else: for key, value in d.items(): if value == max_benefit: clique.append(key) nodes.remove(key) return clique
n_dimensions_x) # Feature transform transform = True constant = True first = True second = True third = True exponential = False feature_vec = [] if transform: # constant if constant: feature_vec = [Feature(np.array([]), 'multiply')] # first order monomials: linear (additional features: 15, total 16) if first: for i in range(n_dimensions_x): feature_vec.append(Feature(np.array([i]), 'multiply')) if second: # second order monomials: quadratic (additional features: 15*15 = 225, total 241) for i in range(n_dimensions_x): for j in range(n_dimensions_x): feature_vec.append(Feature(np.array([i, j]), 'multiply')) if third: for i in range(n_dimensions_x):
def __init__(self, basis): self.features = [] self.basis = Feature(basis, 'anatomy_features') self.add_feature(self.basis) self.wetness = 0 self.stretch = 0
def feature_extraction(document): feat = Feature(document.filename).execute(hop_length=512) return feat.df