def load_row_by_row(filename):
    """Load a data file row by row.
    
    Parameters
    ----------
    filename: str 
        The file containing the data to load
        
    Yields:
    --------
    list:
      A line of the file to load.
      
    """
    from ast import literal_eval as make_list

    with open(filename, "r") as f:
        i = 0
        for n, line in enumerate(f):

            i += 1
            if i % 10000000 == 0:
                print(i, "lines processed")
            try:
                row = make_list(line.strip())
            except:
                print("Error at line", n)
                continue
            yield row
Beispiel #2
0
 def replace_lists(self, params):
     for key, val in params.items():
         if type(val) == str and val[0] == '[' and val[-1] == ']':
             from ast import literal_eval as make_list
             params[key] = make_list(val)
         elif type(val) == dict:
             self.replace_lists(val)
def reload_apply_save(load_file,
                      save_file,
                      func,
                      func_param=None,
                      pattern_to_remove=None):
    """Load a dumped data, apply a function and save it.
    
    Load the list of lists of bitcoin addresses, convert
    the addresses from string to integers, and save the result.
        
    Parameters
    ----------
    load_file : str
        Name of the file containing the list of lists to load
    save_file : str
        save the result in this file
    func: function
        Function used to convert the BTC addresses to integers
    func_param: variable, str
        A parameter of func
    pattern_to_remove: str
        Description of the pattern to remove in the list (or data)
    
    Returns
    -------
    write : list
        a list of the converted addresses in int 
    
    Example
    -------
    >>> reload_apply_save('input_addresses_str', 'input_addresses_int',
                            rewrite, addresses_ids, ' ')
    [20, 1, 2]
    The result is written in the file 'input_addresses_int', for each
    transaction.

    """
    from ast import literal_eval as make_list

    with open(save_file, "w") as save:

        with open(load_file, "r") as f:
            i = 0
            for line in f:
                i += 1
                if i % 10000000 == 0:
                    print(i, "Transactions processed...")
                row = make_list(line.strip())

                # Remove the pattern specified
                if pattern_to_remove is not None and pattern_to_remove in row:
                    row.remove(pattern_to_remove)

                elif func_param is None:
                    save.write("%s\n" % func(row))
                else:
                    save.write("%s\n" % func(row, func_param))
            print("The total number of transaction treated is:", i)
Beispiel #4
0
    def graph_by_time(self, df, g):
        callsites = df["name"].unique()

        ret = nx.DiGraph()

        for callsite in callsites:
            path = df.loc[df["name"] == callsite]["path"].tolist()[0]
            path = make_list(path)
            ret.add_path(path)

        return ret
Beispiel #5
0
    def _break_cycles_in_paths(path):
        """
        Breaks cycles if present in the callpath.

        Parameter:
            path: path array
        """
        from ast import literal_eval as make_list

        ret = []
        moduleMapper = {}
        dataMap = {}

        # TODO: see if we can remove this.
        if isinstance(path, float):
            return []

        path_list = make_list(path)

        for idx, elem in enumerate(path_list):
            callsite = elem.split("=")[1]
            module = elem.split("=")[0]
            if module not in dataMap:
                moduleMapper[module] = 0
                dataMap[module] = [{
                    "callsite": callsite,
                    "module": module,
                    "level": idx
                }]
            else:
                flag = [p["level"] == idx for p in dataMap[module]]
                if np.any(np.array(flag)):
                    moduleMapper[module] += 1
                    dataMap[module].append({
                        "callsite": callsite,
                        "module": module + "=" + callsite,
                        "level": idx,
                    })
                else:
                    dataMap[module].append({
                        "callsite": callsite,
                        "module": module,
                        "level": idx
                    })
            ret.append(dataMap[module][-1])

        return ret
Beispiel #6
0
    def create_group_path(self, path):
        if isinstance(path, str):
            path = make_list(path)
        group_path = []
        prev_module = None
        for idx, callsite in enumerate(path):
            if idx == 0:
                # Assign the first callsite as from_callsite and not push into an array.
                from_callsite = callsite
                # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0]
                from_module = self.callsite_module_map[from_callsite]

                # Store the previous module to check the hierarchy later.
                prev_module = from_module

                # Create the entry function and other functions dict.
                if from_module not in self.entry_funcs:
                    self.entry_funcs[from_module] = []
                if from_module not in self.other_funcs:
                    self.other_funcs[from_module] = []

                # Push into entry function dict since it is the first callsite.
                self.entry_funcs[from_module].append(from_callsite)

                # Append to the group path.
                group_path.append(from_module + "=" + from_callsite)

            elif idx == len(path) - 1:
                # Final callsite in the path.
                to_callsite = callsite
                if "/" in to_callsite:
                    to_callsite = to_callsite.split("/")[-1]

                to_module = self.callsite_module_map[to_callsite]

                if prev_module != to_module:
                    group_path.append(to_module + "=" + to_callsite)

                if to_module not in self.entry_funcs:
                    self.entry_funcs[to_module] = []
                if to_module not in self.other_funcs:
                    self.other_funcs[to_module] = []

                if to_callsite not in self.other_funcs[to_module]:
                    self.other_funcs[to_module].append(to_callsite)

                if to_callsite not in self.entry_funcs[to_module]:
                    self.entry_funcs[to_module].append(to_callsite)
            else:
                # Assign the from and to callsite.
                from_callsite = path[idx - 1]
                if "/" in callsite:
                    to_callsite = callsite.split("/")[-1]
                else:
                    to_callsite = callsite

                from_module = self.callsite_module_map[from_callsite]
                to_module = self.callsite_module_map[to_callsite]

                # Create the entry function and other function dict if not already present.
                if to_module not in self.entry_funcs:
                    self.entry_funcs[to_module] = []
                if to_module not in self.other_funcs:
                    self.other_funcs[to_module] = []

                # if previous module is not same as the current module.
                if to_module != prev_module:
                    # TODO: Come back and check if it is in the path.
                    if to_module in group_path:
                        prev_module = to_module
                    else:
                        group_path.append(to_module + "=" + to_callsite)
                        prev_module = to_module
                        if to_callsite not in self.entry_funcs[to_module]:
                            self.entry_funcs[to_module].append(to_callsite)

                elif to_module == prev_module:
                    to_callsite = callsite
                    # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
                    to_module = self.callsite_module_map[to_callsite]

                    prev_module = to_module

                    if to_callsite not in self.other_funcs[to_module]:
                        self.other_funcs[to_module].append(to_callsite)

        return group_path
Beispiel #7
0
 def generate_sp_edge_sframe(x):
     return make_list(x['sp_edges'])
zipped_2 = zip(POSTCOMMENTID, COMMENTS, ERROR_WORDS_LIST, SUGGEST_WORDS_LIST)

Dict_Error_Suggestion = {}
Dic_Error_Suggestion_One_Edit = {}
Dic_Error_Suggestion_Split = {}
Dic_Error_Suggestion_Repeated_Letters = {}


for i,c,e,s in zipped_2:

    c_new = str(c)
    
    for j in range(0,len(e)):
            
        if len(make_list(s[j])) == 1:
            c_new = str(str(c_new).replace(str(e[j]), str(make_list(s[j])[0])))
            error_delete_element.append(str(e[j]))
            suggest_delete_element.append(make_list(s[j]))

        elif str(e[j]).isupper() == True:
            
            if de.check(str(e[j]).capitalize()) == True or de.check(str(e[j]).lower()) == True:
                c_new = str(str(c_new).replace(str(e[j]), str(e[j])))
                error_delete_element.append(str(e[j]))
                suggest_delete_element.append(make_list(s[j]))

        elif de.check(str(e[j]).capitalize()) == True: #and (str(make_list(s[j])[0]) == str(str(e[j]).capitalize())):
            c_new = str(str(c_new).replace(str(e[j]), str(e[j]).capitalize()))
            error_delete_element.append(str(e[j]))
            suggest_delete_element.append(make_list(s[j]))
Beispiel #9
0
import csv
from ast import literal_eval as make_list

a = open("F:/GCN-IMC-MD/code/result.txt", "r")
strstr = a.readlines()
str1 = make_list(strstr[0])
str2 = make_list(strstr[1])

out1 = open('fpr_model.csv', 'w', newline='')
out2 = open('tpr_model.csv', 'w', newline='')
csv_write1 = csv.writer(out1, dialect="excel")
csv_write2 = csv.writer(out2, dialect="excel")
for i in str1:
    csv_write1.writerow([str(i)])

for i in str2:
    csv_write2.writerow([str(i)])


out1.close()
out2.close()
Beispiel #10
0
neg_patterns = []
for line in open("neg_patterns.txt"):
	pattern = make_tuple(line)
	neg_patterns.append(pattern)

patterns = pos_patterns + neg_patterns

pos_features = []
neg_features = []

train = []

i = 0
for line in open("features_pos.txt"):
	line = line.rstrip("\n")
	labels = make_list(line)

	features = {}
	for j in range(0,len(labels)):
		features[j] = labels[j]

	pos_features.append(features)
	print(i)
	i = i + 1
	train.append((features, 1))

i = 0
for line in open("features_neg.txt"):
	line = line.rstrip("\n")
	labels = make_list(line)
with open('C:/Users/Vahid/Desktop/List_FirstName.pkl', 'rb') as f:
    List_FirstName = pickle.load(f)

with open('C:/Users/Vahid/Desktop/List_LastName.pkl', 'rb') as f:
    List_LastName = pickle.load(f)


COMMENTSTABLE = Table.read_table("C:/Users/Vahid/Desktop/IncorrectComments.csv", encoding="ISO-8859-1")

ID = COMMENTSTABLE.column(0)
Comments = COMMENTSTABLE.column(1)
Errors = []

for i in COMMENTSTABLE.column(2):
    Errors.append(make_list(i))


def spacy_ent_check(word):

    docs_list = [str(word), str(word.lower()), str(word.capitalize())]

    for item in docs_list:
        doc = nlp(str(item))
        for ent in doc.ents:
            if ent.label_:
                return(item)
                break

def spacy_tag_check(word):
def get_mon_info():
    cfg = configparser.ConfigParser()
    cfg.read("config.ini")
    from ast import literal_eval as make_list
    return make_list(cfg["Settings"]["mon_info"])