def __init__(self, **kwargs): O.__init__(self, **kwargs) self.name = "dict" self.is_valid = True self.is_dict = True self.key_type = None self.val_type = None
def __init__(self, variable_visitor, file_to_trace, ignores=[], **kwargs): self.variable_visitor = variable_visitor self.file_to_trace = file_to_trace self.ignores = ignores self.prev_line_no_map = {} self.lines_seen = set() O.__init__(self, **kwargs)
def __init__(self, predicted=None, actual=None): O.__init__(self) if predicted is not None and actual is not None: self.accuracy = sk_metrics.accuracy_score(actual, predicted) self.precision = sk_metrics.precision_score(actual, predicted, average='weighted') self.recall = sk_metrics.recall_score(actual, predicted, average='weighted') self.f_score = sk_metrics.f1_score(actual, predicted, average='weighted')
def __init__(self, graph, permitted="conferences", ignores=set()): O.__init__(self, graph=graph) self.vectorizer = None self.doc_2_vec = None self.documents = None self.permitted = permitted self.ignores = ignores
def __init__(self): O.__init__(self) self.paper_nodes = None # Paper Nodes self.author_nodes = None # Author Nodes self.author_edges = None # Directed Edges between author and paper self.cite_edges = None # Directed Edges between reference paper and base paper self.collaborator_edges = None # Weighted Undirected edges between authors
def __init__(self, **kwargs): self.file_source = None self.method_name = None self.start_pos = None self.end_pos = None self._ast = None self.is_return = False O.__init__(self, **kwargs)
def __init__(self, functions, distance_function=execution_distance, **kwargs): self.functions = functions self.distance_function = distance_function self.union_find = uf.UnionFind(functions) O.__init__(self, **kwargs)
def __init__(self): """ Points to root of the tree and number of children under it :return: """ O.__init__(self) self.n = 0 self.left = None self.right = None
def __init__(self, id, parent = None, node_type = 'o'): O.__init__(self) self.id = id self.parent = parent self.node_type = node_type self.children = [] if node_type == 'g': self.g_u = 1 self.g_d = 0
def __init__(self): O.__init__(self) self.root = None self.features = [] self.groups = [] self.leaves = [] self.con = [] self.cost = [] self.featureNum = 0
def __init__(self, model, settings): """ Initialize an algorithm :param model: :param settings: :return: """ O.__init__(self) self.model = model self.settings = settings
def __init__(self, **kwargs): self.sim_score = None self.n_mismatched = 0 self.size_diff = None self.row_diff = None self.col_diff = None self.n_val1_empty = 0 self.n_val2_empty = 0 self.n_both_empty = 0 O.__init__(self, **kwargs)
def __init__(self, name, scope, var_type, positions, **kwargs): self.name = name self.scope = scope self.var_type = var_type self.positions = positions self.type = None self._store_positions = set() self._updated_positions = set() self._prev_value = None self._is_type_set = False O.__init__(self, **kwargs)
def __init__(self, **kwargs): self.title = None self.keywords = None self.abstract = None self.category = None self.decision = "reject" self.raw_decision = "reject" self.conference = None self.year = None self.authors = None O.__init__(self, **kwargs)
def __init__(self, documents): O.__init__(self) document_map = OrderedDict() agency_map = OrderedDict() for document in documents: document_map[document.id] = document for agency in document.agencies: a_documents = agency_map.get(agency, []) a_documents.append(document.id) agency_map[agency] = a_documents self.agency_map = agency_map self.document_map = document_map self.vectorizer = None
def __init__(self, outputs_json=None, **kwargs): O.__init__(self, **kwargs) self.returns = [] self.errors = [] self.durations = [] if outputs_json is not None: for output_json in outputs_json: self.returns.append(output_json["return"] if "return" in output_json else None) self.errors.append(output_json["errorMessage"] if "errorMessage" in output_json else None) self.durations.append(output_json["duration"] if "duration" in output_json else None)
def __init__(self, **kwargs): self.file_source = None self.name = None self.return_type = None self.start_pos = None self.end_pos = None self.args = None self.statement_blocks = [] # [<Statements>] self._statement_groups = None # [[<Statements>], [<Statements>]] self._ast = None self._scope = None self._prerequisite_statements = [] O.__init__(self, **kwargs)
def build_graph(index, train_x, train_y, cite_map, use_references=True, from_cache=True): if use_references: cached = "cache/graphs/%d_ref.pkl" % index else: cached = "cache/graphs/%d.pkl" % index if os.path.isfile(cached) and from_cache: with open(cached) as f: return cPkl.load(f) vocab_file = 'cache/vocabulary/%d.pkl' % index vocabulary, reverse_vocabulary = construct_vocabulary(train_x, vocab_file) vocabulary_words = set(vocabulary.keys()) analyze = predict.analyzer() doc_map = {} for x, y in zip(train_x, train_y): tokens = set(analyze(x.raw)).intersection(vocabulary_words) # add_tokens(tokens, nodes) doc = Doc(x.id, tokens, y) doc_map[x.id] = doc edges = np.zeros((VOCAB_SIZE, VOCAB_SIZE), dtype=np.int16) for i, x in enumerate(train_x): if i % 1000 == 0: print(i) tokens = list(doc_map[x.id].tokens) make_self_edges(tokens, edges, vocabulary) if use_references: references = cite_map.get(x.id, []) for reference in references: if reference not in doc_map: # belongs to test set continue make_edges(tokens, list(doc_map[reference].tokens), edges, vocabulary) word_network = O(doc_map=doc_map, edges=edges) with open(cached, "wb") as f: cPkl.dump(word_network, f, cPkl.HIGHEST_PROTOCOL) return word_network
def make_name_db(): root_folder = "data/us_names" pkl_file = US_GENDER_FILE db = {} for f_name in os.listdir(root_folder): f_name = "%s/%s" % (root_folder, f_name) with open(f_name) as f: print(f_name) for line in f.readlines(): [name, gender, count] = line.split(",") name = name.lower() node = db.get(name, None) if node is None: node = O() node.name = name node.females = 0 node.males = 0 if gender == 'F': node.females += int(count) elif gender == 'M': node.males += int(count) db[name] = node with open(pkl_file, "wb") as f: pkl.dump(db, f, pkl.HIGHEST_PROTOCOL) return db
def make_indian_name_db(): def split(l): splits = l.split() return int(splits[2]), int(splits[3]), splits[4].lower() inp_file = "data/ind_names.txt" db = {} with open(inp_file) as f: index = 0 for line in f.readlines(): index += 1 if index % 1000 == 0: print("Line : %d", index) males, females, name = split(line) node = db.get(name, None) if node is None: node = O() node.name = name node.females = 0 node.males = 0 node.females += females node.males += males db[name] = node pkl_file = INDIAN_GENDER_FILE with open(pkl_file, "wb") as f: pkl.dump(db, f, pkl.HIGHEST_PROTOCOL) return db
def make_us_states_name_db(): def split(l): splits = l.split(",") return splits[1], splits[3].lower() root_folder = "data/us_states" pkl_file = US_STATES_GENDER_FILE db = {} for f_name in os.listdir(root_folder): f_name = "%s/%s" % (root_folder, f_name) with open(f_name) as f: print(f_name) for line in f.readlines(): gender, name = split(line) node = db.get(name, None) if node is None: node = O() node.name = name node.females = 0 node.males = 0 if gender == 'F': node.females += 1 elif gender == 'M': node.males += 1 db[name] = node with open(pkl_file, "wb") as f: pkl.dump(db, f, pkl.HIGHEST_PROTOCOL) return db
def __init__(self, name, problem): """ Base class algorithm :param name: Name of the algorithm :param problem: Instance of the problem :return: """ O.__init__(self) self.name = name self.problem = problem self.stat = Stat(problem, self) self.select = None self.evolve = None self.recombine = None self._reference = None self.is_pareto = True self.gen = 0
def avg_score(metrics_arr): accuracies, precisions, recalls, f_scores = [], [], [], [] for metrics in metrics_arr: accuracies.append(metrics.accuracy) precisions.append(metrics.precision) recalls.append(metrics.recall) f_scores.append(metrics.f_score) score = Metrics() score.accuracy = O(median=Metrics.median(accuracies), iqr=Metrics.iqr(accuracies)) score.precision = O(median=Metrics.median(precisions), iqr=Metrics.iqr(precisions)) score.recall = O(median=Metrics.median(recalls), iqr=Metrics.iqr(recalls)) score.f_score = O(median=Metrics.median(f_scores), iqr=Metrics.iqr(f_scores)) return score
def avg_score(metrics_arr): accuracies, precisions, recalls, f_scores, specificities = [], [], [], [], [] pre_reject_misseds = [] for metrics in metrics_arr: accuracies.append(metrics.accuracy) precisions.append(metrics.precision) recalls.append(metrics.recall) f_scores.append(metrics.f_score) specificities.append(metrics.specificity) pre_reject_misseds.append(metrics.pre_reject_missed / (metrics.pre_reject + metrics.EPS)) score = O() score.accuracy = O(median=Metrics.median(accuracies), iqr=Metrics.iqr(accuracies)) score.precision = O(median=Metrics.median(precisions), iqr=Metrics.iqr(precisions)) score.recall = O(median=Metrics.median(recalls), iqr=Metrics.iqr(recalls)) score.f_score = O(median=Metrics.median(f_scores), iqr=Metrics.iqr(f_scores)) score.specificity = O(median=Metrics.median(specificities), iqr=Metrics.iqr(specificities)) score.pre_reject_missed = O(median=Metrics.median(pre_reject_misseds), iqr=Metrics.iqr(pre_reject_misseds)) return score
def __init__(self, **kwargs): Function._id += 1 self.id = Function._id self.name = None self.body = None self.dataset = None self.package = None self.className = None self.source = None self.lines_touched = None self.span = None self.input_key = None self.return_attribute = None self.outputs = None # Meta-info self.useful = None self.source = None self.is_cloned = False self.base_name = None O.__init__(self, **kwargs)
def __init__(self, predicted, actual, positive, negative, raw_decisions): O.__init__(self) self.tp, self.fp, self.fn, self.tn = 0, 0, 0, 0 self.pre_reject, self.pre_reject_missed = 0, 0 for i, (p, a) in enumerate(zip(predicted, actual)): if p == positive and a == positive: self.tp += 1 elif p == positive and a == negative: self.fp += 1 elif p == negative and a == positive: self.fn += 1 else: self.tn += 1 if raw_decisions[i] == PRE_REJECT and p == positive: self.pre_reject_missed += 1 elif raw_decisions[i] == PRE_REJECT: self.pre_reject += 1 self.accuracy = (self.tp + self.tn) / len(predicted) self.precision = self.tp / (self.tp + self.fp + Metrics.EPS) self.recall = self.tp / (self.tp + self.fn + Metrics.EPS) self.specificity = self.tn / (self.tn + self.fp + Metrics.EPS) self.f_score = 2 * self.precision * self.recall / (self.precision + self.recall + Metrics.EPS)
def __init__(self, predicted, actual, positive, negative, raw_decisions): O.__init__(self) self.tp, self.fp, self.fn, self.tn = 0, 0, 0, 0 self.pre_reject, self.pre_reject_missed = 0, 0 for i, (p, a) in enumerate(zip(predicted, actual)): if p == positive and a == positive: self.tp += 1 elif p == positive and a == negative: self.fp += 1 elif p == negative and a == positive: self.fn += 1 else: self.tn += 1 if raw_decisions[i] == PRE_REJECT and p == positive: self.pre_reject_missed += 1 elif raw_decisions[i] == PRE_REJECT: self.pre_reject += 1 self.accuracy = (self.tp + self.tn) / len(predicted) self.precision = self.tp / (self.tp + self.fp + Metrics.EPS) self.recall = self.tp / (self.tp + self.fn + Metrics.EPS) self.specificity = self.tn / (self.tn + self.fp + Metrics.EPS) self.f_score = 2 * self.precision * self.recall / ( self.precision + self.recall + Metrics.EPS)
def get_venues(): db = DB.get() cur = db.cursor() cur.execute('SELECT * FROM venues') venues = OrderedDict() for row in cur.fetchall(): venue = O() venue.id = str(row[0]) venue.acronym = row[1] venue.name = row[2] venue.impact = int(row[3]) venue.is_conference = True if row[4] == 1 else False venues[venue.id] = venue DB.close() return venues
def vectorize(papers, iterations=ITERATIONS): miner, graph, lda_model, vocab = get_graph_lda_data(iterations=iterations) # vectorizer = text.CountVectorizer(stop_words=STOP_WORDS, token_pattern=TOKEN_PATTERN) docs = [ paper.abstract if paper.abstract is not None and paper.abstract != 'None' else paper.title for paper in papers ] doc_2_vec = miner.vectorizer.transform(docs) doc_2_vec_array = doc_2_vec.toarray() transformed = lda_model.transform(doc_2_vec_array) report(lda_model, vocab) for paper, t, d_2_v in zip(papers, transformed, doc_2_vec_array): paper.transformed = t paper.doc_2_vec = d_2_v return O(miner=miner, graph=graph, lda_model=lda_model, vocab=vocab, doc_2_vec=doc_2_vec)
def __init__(self, **kwargs): O.__init__(self, **kwargs) self.id = Document.id Document.id += 1
def __init__(self, id, literals, literals_pos): O.__init__(self) self.id = id self.literals = literals self.li_pos = literals_pos
plt.savefig(fig_name, bbox_inches='tight') plt.clf() # Settings for 10 rows and 5 columns settings_10_5 = O( fig_size=(8, 8), col_axes=[ 0.3, # col dendo left 0.81, # col dendo bottom 0.36, # col dendo width 0.15 ], # col dendo height row_axes=[ 0.0, # row dendo left 0.055, # row dendo bottom 0.23, # row dendo width 0.69 ], # row dendo height plot_axes=[ 0.10, # hm left 0.05, # hm bottom 0.7, # hm width 0.7 ], # hm height ) # Settings for 10 rows and 4 columns settings_10_4 = O( fig_size=(8, 8), col_axes=[
def __init__(self, raw=None): O.__init__(self) self.raw = raw self.vector = None self.topics_count = None self.topics_score = None
def __init__(self, dataset, **kwargs): O.__init__(self, **kwargs) self.dataset = dataset
def __init__(self, decisions, objectives=None): O.__init__(self) Point.id += 1 self.id = Point.id self.decisions = decisions self.objectives = objectives
def __init__(self, name, parent, **kwargs): self.name = name self.parent = parent self.children = {} self._danglings = {} O.__init__(self, **kwargs)