def load_row_by_row(filename): """Load a data file row by row. Parameters ---------- filename: str The file containing the data to load Yields: -------- list: A line of the file to load. """ from ast import literal_eval as make_list with open(filename, "r") as f: i = 0 for n, line in enumerate(f): i += 1 if i % 10000000 == 0: print(i, "lines processed") try: row = make_list(line.strip()) except: print("Error at line", n) continue yield row
def replace_lists(self, params): for key, val in params.items(): if type(val) == str and val[0] == '[' and val[-1] == ']': from ast import literal_eval as make_list params[key] = make_list(val) elif type(val) == dict: self.replace_lists(val)
def reload_apply_save(load_file, save_file, func, func_param=None, pattern_to_remove=None): """Load a dumped data, apply a function and save it. Load the list of lists of bitcoin addresses, convert the addresses from string to integers, and save the result. Parameters ---------- load_file : str Name of the file containing the list of lists to load save_file : str save the result in this file func: function Function used to convert the BTC addresses to integers func_param: variable, str A parameter of func pattern_to_remove: str Description of the pattern to remove in the list (or data) Returns ------- write : list a list of the converted addresses in int Example ------- >>> reload_apply_save('input_addresses_str', 'input_addresses_int', rewrite, addresses_ids, ' ') [20, 1, 2] The result is written in the file 'input_addresses_int', for each transaction. """ from ast import literal_eval as make_list with open(save_file, "w") as save: with open(load_file, "r") as f: i = 0 for line in f: i += 1 if i % 10000000 == 0: print(i, "Transactions processed...") row = make_list(line.strip()) # Remove the pattern specified if pattern_to_remove is not None and pattern_to_remove in row: row.remove(pattern_to_remove) elif func_param is None: save.write("%s\n" % func(row)) else: save.write("%s\n" % func(row, func_param)) print("The total number of transaction treated is:", i)
def graph_by_time(self, df, g): callsites = df["name"].unique() ret = nx.DiGraph() for callsite in callsites: path = df.loc[df["name"] == callsite]["path"].tolist()[0] path = make_list(path) ret.add_path(path) return ret
def _break_cycles_in_paths(path): """ Breaks cycles if present in the callpath. Parameter: path: path array """ from ast import literal_eval as make_list ret = [] moduleMapper = {} dataMap = {} # TODO: see if we can remove this. if isinstance(path, float): return [] path_list = make_list(path) for idx, elem in enumerate(path_list): callsite = elem.split("=")[1] module = elem.split("=")[0] if module not in dataMap: moduleMapper[module] = 0 dataMap[module] = [{ "callsite": callsite, "module": module, "level": idx }] else: flag = [p["level"] == idx for p in dataMap[module]] if np.any(np.array(flag)): moduleMapper[module] += 1 dataMap[module].append({ "callsite": callsite, "module": module + "=" + callsite, "level": idx, }) else: dataMap[module].append({ "callsite": callsite, "module": module, "level": idx }) ret.append(dataMap[module][-1]) return ret
def create_group_path(self, path): if isinstance(path, str): path = make_list(path) group_path = [] prev_module = None for idx, callsite in enumerate(path): if idx == 0: # Assign the first callsite as from_callsite and not push into an array. from_callsite = callsite # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0] from_module = self.callsite_module_map[from_callsite] # Store the previous module to check the hierarchy later. prev_module = from_module # Create the entry function and other functions dict. if from_module not in self.entry_funcs: self.entry_funcs[from_module] = [] if from_module not in self.other_funcs: self.other_funcs[from_module] = [] # Push into entry function dict since it is the first callsite. self.entry_funcs[from_module].append(from_callsite) # Append to the group path. group_path.append(from_module + "=" + from_callsite) elif idx == len(path) - 1: # Final callsite in the path. to_callsite = callsite if "/" in to_callsite: to_callsite = to_callsite.split("/")[-1] to_module = self.callsite_module_map[to_callsite] if prev_module != to_module: group_path.append(to_module + "=" + to_callsite) if to_module not in self.entry_funcs: self.entry_funcs[to_module] = [] if to_module not in self.other_funcs: self.other_funcs[to_module] = [] if to_callsite not in self.other_funcs[to_module]: self.other_funcs[to_module].append(to_callsite) if to_callsite not in self.entry_funcs[to_module]: self.entry_funcs[to_module].append(to_callsite) else: # Assign the from and to callsite. from_callsite = path[idx - 1] if "/" in callsite: to_callsite = callsite.split("/")[-1] else: to_callsite = callsite from_module = self.callsite_module_map[from_callsite] to_module = self.callsite_module_map[to_callsite] # Create the entry function and other function dict if not already present. if to_module not in self.entry_funcs: self.entry_funcs[to_module] = [] if to_module not in self.other_funcs: self.other_funcs[to_module] = [] # if previous module is not same as the current module. if to_module != prev_module: # TODO: Come back and check if it is in the path. if to_module in group_path: prev_module = to_module else: group_path.append(to_module + "=" + to_callsite) prev_module = to_module if to_callsite not in self.entry_funcs[to_module]: self.entry_funcs[to_module].append(to_callsite) elif to_module == prev_module: to_callsite = callsite # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0] to_module = self.callsite_module_map[to_callsite] prev_module = to_module if to_callsite not in self.other_funcs[to_module]: self.other_funcs[to_module].append(to_callsite) return group_path
def generate_sp_edge_sframe(x): return make_list(x['sp_edges'])
zipped_2 = zip(POSTCOMMENTID, COMMENTS, ERROR_WORDS_LIST, SUGGEST_WORDS_LIST) Dict_Error_Suggestion = {} Dic_Error_Suggestion_One_Edit = {} Dic_Error_Suggestion_Split = {} Dic_Error_Suggestion_Repeated_Letters = {} for i,c,e,s in zipped_2: c_new = str(c) for j in range(0,len(e)): if len(make_list(s[j])) == 1: c_new = str(str(c_new).replace(str(e[j]), str(make_list(s[j])[0]))) error_delete_element.append(str(e[j])) suggest_delete_element.append(make_list(s[j])) elif str(e[j]).isupper() == True: if de.check(str(e[j]).capitalize()) == True or de.check(str(e[j]).lower()) == True: c_new = str(str(c_new).replace(str(e[j]), str(e[j]))) error_delete_element.append(str(e[j])) suggest_delete_element.append(make_list(s[j])) elif de.check(str(e[j]).capitalize()) == True: #and (str(make_list(s[j])[0]) == str(str(e[j]).capitalize())): c_new = str(str(c_new).replace(str(e[j]), str(e[j]).capitalize())) error_delete_element.append(str(e[j])) suggest_delete_element.append(make_list(s[j]))
import csv from ast import literal_eval as make_list a = open("F:/GCN-IMC-MD/code/result.txt", "r") strstr = a.readlines() str1 = make_list(strstr[0]) str2 = make_list(strstr[1]) out1 = open('fpr_model.csv', 'w', newline='') out2 = open('tpr_model.csv', 'w', newline='') csv_write1 = csv.writer(out1, dialect="excel") csv_write2 = csv.writer(out2, dialect="excel") for i in str1: csv_write1.writerow([str(i)]) for i in str2: csv_write2.writerow([str(i)]) out1.close() out2.close()
neg_patterns = [] for line in open("neg_patterns.txt"): pattern = make_tuple(line) neg_patterns.append(pattern) patterns = pos_patterns + neg_patterns pos_features = [] neg_features = [] train = [] i = 0 for line in open("features_pos.txt"): line = line.rstrip("\n") labels = make_list(line) features = {} for j in range(0,len(labels)): features[j] = labels[j] pos_features.append(features) print(i) i = i + 1 train.append((features, 1)) i = 0 for line in open("features_neg.txt"): line = line.rstrip("\n") labels = make_list(line)
with open('C:/Users/Vahid/Desktop/List_FirstName.pkl', 'rb') as f: List_FirstName = pickle.load(f) with open('C:/Users/Vahid/Desktop/List_LastName.pkl', 'rb') as f: List_LastName = pickle.load(f) COMMENTSTABLE = Table.read_table("C:/Users/Vahid/Desktop/IncorrectComments.csv", encoding="ISO-8859-1") ID = COMMENTSTABLE.column(0) Comments = COMMENTSTABLE.column(1) Errors = [] for i in COMMENTSTABLE.column(2): Errors.append(make_list(i)) def spacy_ent_check(word): docs_list = [str(word), str(word.lower()), str(word.capitalize())] for item in docs_list: doc = nlp(str(item)) for ent in doc.ents: if ent.label_: return(item) break def spacy_tag_check(word):
def get_mon_info(): cfg = configparser.ConfigParser() cfg.read("config.ini") from ast import literal_eval as make_list return make_list(cfg["Settings"]["mon_info"])