Esempio n. 1
0
 def __init__(self, **kwargs):
   O.__init__(self, **kwargs)
   self.name = "dict"
   self.is_valid = True
   self.is_dict = True
   self.key_type = None
   self.val_type = None
Esempio n. 2
0
 def __init__(self, variable_visitor, file_to_trace, ignores=[], **kwargs):
     self.variable_visitor = variable_visitor
     self.file_to_trace = file_to_trace
     self.ignores = ignores
     self.prev_line_no_map = {}
     self.lines_seen = set()
     O.__init__(self, **kwargs)
Esempio n. 3
0
 def __init__(self, predicted=None, actual=None):
   O.__init__(self)
   if predicted is not None and actual is not None:
     self.accuracy = sk_metrics.accuracy_score(actual, predicted)
     self.precision = sk_metrics.precision_score(actual, predicted, average='weighted')
     self.recall = sk_metrics.recall_score(actual, predicted, average='weighted')
     self.f_score = sk_metrics.f1_score(actual, predicted, average='weighted')
Esempio n. 4
0
 def __init__(self, graph, permitted="conferences", ignores=set()):
   O.__init__(self, graph=graph)
   self.vectorizer = None
   self.doc_2_vec = None
   self.documents = None
   self.permitted = permitted
   self.ignores = ignores
Esempio n. 5
0
 def __init__(self):
   O.__init__(self)
   self.paper_nodes = None           # Paper Nodes
   self.author_nodes = None          # Author Nodes
   self.author_edges = None          # Directed Edges between author and paper
   self.cite_edges = None            # Directed Edges between reference paper and base paper
   self.collaborator_edges = None    # Weighted Undirected edges between authors
Esempio n. 6
0
 def __init__(self):
     O.__init__(self)
     self.paper_nodes = None  # Paper Nodes
     self.author_nodes = None  # Author Nodes
     self.author_edges = None  # Directed Edges between author and paper
     self.cite_edges = None  # Directed Edges between reference paper and base paper
     self.collaborator_edges = None  # Weighted Undirected edges between authors
Esempio n. 7
0
 def __init__(self, **kwargs):
     self.file_source = None
     self.method_name = None
     self.start_pos = None
     self.end_pos = None
     self._ast = None
     self.is_return = False
     O.__init__(self, **kwargs)
Esempio n. 8
0
 def __init__(self,
              functions,
              distance_function=execution_distance,
              **kwargs):
     self.functions = functions
     self.distance_function = distance_function
     self.union_find = uf.UnionFind(functions)
     O.__init__(self, **kwargs)
 def __init__(self):
   """
   Points to root of the tree and number of children under it
   :return:
   """
   O.__init__(self)
   self.n = 0
   self.left = None
   self.right = None
Esempio n. 10
0
 def __init__(self, id, parent = None, node_type = 'o'):
     O.__init__(self)
     self.id = id
     self.parent = parent
     self.node_type = node_type
     self.children = []
     if node_type == 'g':
         self.g_u = 1
         self.g_d = 0
Esempio n. 11
0
 def __init__(self):
     O.__init__(self)
     self.root = None
     self.features = []
     self.groups = []
     self.leaves = []
     self.con = []
     self.cost = []
     self.featureNum = 0
Esempio n. 12
0
 def __init__(self):
     """
 Points to root of the tree and number of children under it
 :return:
 """
     O.__init__(self)
     self.n = 0
     self.left = None
     self.right = None
Esempio n. 13
0
 def __init__(self, model, settings):
   """
   Initialize an algorithm
   :param model:
   :param settings:
   :return:
   """
   O.__init__(self)
   self.model    = model
   self.settings = settings
Esempio n. 14
0
 def __init__(self, **kwargs):
     self.sim_score = None
     self.n_mismatched = 0
     self.size_diff = None
     self.row_diff = None
     self.col_diff = None
     self.n_val1_empty = 0
     self.n_val2_empty = 0
     self.n_both_empty = 0
     O.__init__(self, **kwargs)
Esempio n. 15
0
 def __init__(self, name, scope, var_type, positions, **kwargs):
     self.name = name
     self.scope = scope
     self.var_type = var_type
     self.positions = positions
     self.type = None
     self._store_positions = set()
     self._updated_positions = set()
     self._prev_value = None
     self._is_type_set = False
     O.__init__(self, **kwargs)
Esempio n. 16
0
 def __init__(self, **kwargs):
     self.title = None
     self.keywords = None
     self.abstract = None
     self.category = None
     self.decision = "reject"
     self.raw_decision = "reject"
     self.conference = None
     self.year = None
     self.authors = None
     O.__init__(self, **kwargs)
Esempio n. 17
0
 def __init__(self, **kwargs):
   self.title = None
   self.keywords = None
   self.abstract = None
   self.category = None
   self.decision = "reject"
   self.raw_decision = "reject"
   self.conference = None
   self.year = None
   self.authors = None
   O.__init__(self, **kwargs)
Esempio n. 18
0
 def __init__(self, predicted=None, actual=None):
     O.__init__(self)
     if predicted is not None and actual is not None:
         self.accuracy = sk_metrics.accuracy_score(actual, predicted)
         self.precision = sk_metrics.precision_score(actual,
                                                     predicted,
                                                     average='weighted')
         self.recall = sk_metrics.recall_score(actual,
                                               predicted,
                                               average='weighted')
         self.f_score = sk_metrics.f1_score(actual,
                                            predicted,
                                            average='weighted')
Esempio n. 19
0
 def __init__(self, documents):
     O.__init__(self)
     document_map = OrderedDict()
     agency_map = OrderedDict()
     for document in documents:
         document_map[document.id] = document
         for agency in document.agencies:
             a_documents = agency_map.get(agency, [])
             a_documents.append(document.id)
             agency_map[agency] = a_documents
     self.agency_map = agency_map
     self.document_map = document_map
     self.vectorizer = None
Esempio n. 20
0
 def __init__(self, outputs_json=None, **kwargs):
     O.__init__(self, **kwargs)
     self.returns = []
     self.errors = []
     self.durations = []
     if outputs_json is not None:
         for output_json in outputs_json:
             self.returns.append(output_json["return"] if "return" in
                                 output_json else None)
             self.errors.append(output_json["errorMessage"]
                                if "errorMessage" in output_json else None)
             self.durations.append(output_json["duration"] if "duration" in
                                   output_json else None)
Esempio n. 21
0
 def __init__(self, **kwargs):
     self.file_source = None
     self.name = None
     self.return_type = None
     self.start_pos = None
     self.end_pos = None
     self.args = None
     self.statement_blocks = []  # [<Statements>]
     self._statement_groups = None  # [[<Statements>], [<Statements>]]
     self._ast = None
     self._scope = None
     self._prerequisite_statements = []
     O.__init__(self, **kwargs)
Esempio n. 22
0
def build_graph(index, train_x, train_y, cite_map, use_references=True, from_cache=True):
  if use_references:
    cached = "cache/graphs/%d_ref.pkl" % index
  else:
    cached = "cache/graphs/%d.pkl" % index
  if os.path.isfile(cached) and from_cache:
    with open(cached) as f:
      return cPkl.load(f)
  vocab_file = 'cache/vocabulary/%d.pkl' % index
  vocabulary, reverse_vocabulary = construct_vocabulary(train_x, vocab_file)
  vocabulary_words = set(vocabulary.keys())
  analyze = predict.analyzer()
  doc_map = {}
  for x, y in zip(train_x, train_y):
    tokens = set(analyze(x.raw)).intersection(vocabulary_words)
    # add_tokens(tokens, nodes)
    doc = Doc(x.id, tokens, y)
    doc_map[x.id] = doc
  edges = np.zeros((VOCAB_SIZE, VOCAB_SIZE), dtype=np.int16)
  for i, x in enumerate(train_x):
    if i % 1000 == 0:
      print(i)
    tokens = list(doc_map[x.id].tokens)
    make_self_edges(tokens, edges, vocabulary)
    if use_references:
      references = cite_map.get(x.id, [])
      for reference in references:
        if reference not in doc_map:  # belongs to test set
          continue
        make_edges(tokens, list(doc_map[reference].tokens), edges, vocabulary)
  word_network = O(doc_map=doc_map, edges=edges)
  with open(cached, "wb") as f:
    cPkl.dump(word_network, f, cPkl.HIGHEST_PROTOCOL)
  return word_network
Esempio n. 23
0
def make_name_db():
    root_folder = "data/us_names"
    pkl_file = US_GENDER_FILE
    db = {}
    for f_name in os.listdir(root_folder):
        f_name = "%s/%s" % (root_folder, f_name)
        with open(f_name) as f:
            print(f_name)
            for line in f.readlines():
                [name, gender, count] = line.split(",")
                name = name.lower()
                node = db.get(name, None)
                if node is None:
                    node = O()
                    node.name = name
                    node.females = 0
                    node.males = 0
                if gender == 'F':
                    node.females += int(count)
                elif gender == 'M':
                    node.males += int(count)
                db[name] = node
    with open(pkl_file, "wb") as f:
        pkl.dump(db, f, pkl.HIGHEST_PROTOCOL)
    return db
Esempio n. 24
0
def make_indian_name_db():
    def split(l):
        splits = l.split()
        return int(splits[2]), int(splits[3]), splits[4].lower()

    inp_file = "data/ind_names.txt"
    db = {}
    with open(inp_file) as f:
        index = 0
        for line in f.readlines():
            index += 1
            if index % 1000 == 0:
                print("Line : %d", index)
            males, females, name = split(line)
            node = db.get(name, None)
            if node is None:
                node = O()
                node.name = name
                node.females = 0
                node.males = 0
            node.females += females
            node.males += males
            db[name] = node
    pkl_file = INDIAN_GENDER_FILE
    with open(pkl_file, "wb") as f:
        pkl.dump(db, f, pkl.HIGHEST_PROTOCOL)
    return db
Esempio n. 25
0
def make_us_states_name_db():
    def split(l):
        splits = l.split(",")
        return splits[1], splits[3].lower()

    root_folder = "data/us_states"
    pkl_file = US_STATES_GENDER_FILE
    db = {}
    for f_name in os.listdir(root_folder):
        f_name = "%s/%s" % (root_folder, f_name)
        with open(f_name) as f:
            print(f_name)
            for line in f.readlines():
                gender, name = split(line)
                node = db.get(name, None)
                if node is None:
                    node = O()
                    node.name = name
                    node.females = 0
                    node.males = 0
                if gender == 'F':
                    node.females += 1
                elif gender == 'M':
                    node.males += 1
                db[name] = node
    with open(pkl_file, "wb") as f:
        pkl.dump(db, f, pkl.HIGHEST_PROTOCOL)
    return db
Esempio n. 26
0
 def __init__(self, name, problem):
   """
   Base class algorithm
   :param name: Name of the algorithm
   :param problem: Instance of the problem
   :return:
   """
   O.__init__(self)
   self.name = name
   self.problem = problem
   self.stat = Stat(problem, self)
   self.select = None
   self.evolve = None
   self.recombine = None
   self._reference = None
   self.is_pareto = True
   self.gen = 0
Esempio n. 27
0
 def avg_score(metrics_arr):
     accuracies, precisions, recalls, f_scores = [], [], [], []
     for metrics in metrics_arr:
         accuracies.append(metrics.accuracy)
         precisions.append(metrics.precision)
         recalls.append(metrics.recall)
         f_scores.append(metrics.f_score)
     score = Metrics()
     score.accuracy = O(median=Metrics.median(accuracies),
                        iqr=Metrics.iqr(accuracies))
     score.precision = O(median=Metrics.median(precisions),
                         iqr=Metrics.iqr(precisions))
     score.recall = O(median=Metrics.median(recalls),
                      iqr=Metrics.iqr(recalls))
     score.f_score = O(median=Metrics.median(f_scores),
                       iqr=Metrics.iqr(f_scores))
     return score
Esempio n. 28
0
 def avg_score(metrics_arr):
     accuracies, precisions, recalls, f_scores, specificities = [], [], [], [], []
     pre_reject_misseds = []
     for metrics in metrics_arr:
         accuracies.append(metrics.accuracy)
         precisions.append(metrics.precision)
         recalls.append(metrics.recall)
         f_scores.append(metrics.f_score)
         specificities.append(metrics.specificity)
         pre_reject_misseds.append(metrics.pre_reject_missed /
                                   (metrics.pre_reject + metrics.EPS))
     score = O()
     score.accuracy = O(median=Metrics.median(accuracies),
                        iqr=Metrics.iqr(accuracies))
     score.precision = O(median=Metrics.median(precisions),
                         iqr=Metrics.iqr(precisions))
     score.recall = O(median=Metrics.median(recalls),
                      iqr=Metrics.iqr(recalls))
     score.f_score = O(median=Metrics.median(f_scores),
                       iqr=Metrics.iqr(f_scores))
     score.specificity = O(median=Metrics.median(specificities),
                           iqr=Metrics.iqr(specificities))
     score.pre_reject_missed = O(median=Metrics.median(pre_reject_misseds),
                                 iqr=Metrics.iqr(pre_reject_misseds))
     return score
Esempio n. 29
0
 def __init__(self, **kwargs):
     Function._id += 1
     self.id = Function._id
     self.name = None
     self.body = None
     self.dataset = None
     self.package = None
     self.className = None
     self.source = None
     self.lines_touched = None
     self.span = None
     self.input_key = None
     self.return_attribute = None
     self.outputs = None
     # Meta-info
     self.useful = None
     self.source = None
     self.is_cloned = False
     self.base_name = None
     O.__init__(self, **kwargs)
Esempio n. 30
0
 def __init__(self, predicted, actual, positive, negative, raw_decisions):
   O.__init__(self)
   self.tp, self.fp, self.fn, self.tn = 0, 0, 0, 0
   self.pre_reject, self.pre_reject_missed = 0, 0
   for i, (p, a) in enumerate(zip(predicted, actual)):
     if p == positive and a == positive:
       self.tp += 1
     elif p == positive and a == negative:
       self.fp += 1
     elif p == negative and a == positive:
       self.fn += 1
     else:
       self.tn += 1
     if raw_decisions[i] == PRE_REJECT and p == positive:
       self.pre_reject_missed += 1
     elif raw_decisions[i] == PRE_REJECT:
       self.pre_reject += 1
   self.accuracy = (self.tp + self.tn) / len(predicted)
   self.precision = self.tp / (self.tp + self.fp + Metrics.EPS)
   self.recall = self.tp / (self.tp + self.fn + Metrics.EPS)
   self.specificity = self.tn / (self.tn + self.fp + Metrics.EPS)
   self.f_score = 2 * self.precision * self.recall / (self.precision + self.recall + Metrics.EPS)
Esempio n. 31
0
 def __init__(self, predicted, actual, positive, negative, raw_decisions):
     O.__init__(self)
     self.tp, self.fp, self.fn, self.tn = 0, 0, 0, 0
     self.pre_reject, self.pre_reject_missed = 0, 0
     for i, (p, a) in enumerate(zip(predicted, actual)):
         if p == positive and a == positive:
             self.tp += 1
         elif p == positive and a == negative:
             self.fp += 1
         elif p == negative and a == positive:
             self.fn += 1
         else:
             self.tn += 1
         if raw_decisions[i] == PRE_REJECT and p == positive:
             self.pre_reject_missed += 1
         elif raw_decisions[i] == PRE_REJECT:
             self.pre_reject += 1
     self.accuracy = (self.tp + self.tn) / len(predicted)
     self.precision = self.tp / (self.tp + self.fp + Metrics.EPS)
     self.recall = self.tp / (self.tp + self.fn + Metrics.EPS)
     self.specificity = self.tn / (self.tn + self.fp + Metrics.EPS)
     self.f_score = 2 * self.precision * self.recall / (
         self.precision + self.recall + Metrics.EPS)
Esempio n. 32
0
def get_venues():
    db = DB.get()
    cur = db.cursor()
    cur.execute('SELECT * FROM venues')
    venues = OrderedDict()
    for row in cur.fetchall():
        venue = O()
        venue.id = str(row[0])
        venue.acronym = row[1]
        venue.name = row[2]
        venue.impact = int(row[3])
        venue.is_conference = True if row[4] == 1 else False
        venues[venue.id] = venue
    DB.close()
    return venues
Esempio n. 33
0
def vectorize(papers, iterations=ITERATIONS):
    miner, graph, lda_model, vocab = get_graph_lda_data(iterations=iterations)
    # vectorizer = text.CountVectorizer(stop_words=STOP_WORDS, token_pattern=TOKEN_PATTERN)
    docs = [
        paper.abstract if paper.abstract is not None
        and paper.abstract != 'None' else paper.title for paper in papers
    ]
    doc_2_vec = miner.vectorizer.transform(docs)
    doc_2_vec_array = doc_2_vec.toarray()
    transformed = lda_model.transform(doc_2_vec_array)
    report(lda_model, vocab)
    for paper, t, d_2_v in zip(papers, transformed, doc_2_vec_array):
        paper.transformed = t
        paper.doc_2_vec = d_2_v
    return O(miner=miner,
             graph=graph,
             lda_model=lda_model,
             vocab=vocab,
             doc_2_vec=doc_2_vec)
Esempio n. 34
0
 def __init__(self, **kwargs):
     O.__init__(self, **kwargs)
     self.id = Document.id
     Document.id += 1
Esempio n. 35
0
 def __init__(self, id, literals, literals_pos):
     O.__init__(self)
     self.id = id
     self.literals = literals
     self.li_pos = literals_pos
Esempio n. 36
0
    plt.savefig(fig_name, bbox_inches='tight')
    plt.clf()


# Settings for 10 rows and 5 columns
settings_10_5 = O(
    fig_size=(8, 8),
    col_axes=[
        0.3,  # col dendo left
        0.81,  # col dendo bottom
        0.36,  # col dendo width
        0.15
    ],  # col dendo height
    row_axes=[
        0.0,  # row dendo left
        0.055,  # row dendo bottom
        0.23,  # row dendo width
        0.69
    ],  # row dendo height
    plot_axes=[
        0.10,  # hm left
        0.05,  # hm bottom
        0.7,  # hm width
        0.7
    ],  # hm height
)

# Settings for 10 rows and 4 columns
settings_10_4 = O(
    fig_size=(8, 8),
    col_axes=[
Esempio n. 37
0
 def __init__(self, raw=None):
   O.__init__(self)
   self.raw = raw
   self.vector = None
   self.topics_count = None
   self.topics_score = None
Esempio n. 38
0
 def __init__(self, dataset, **kwargs):
     O.__init__(self, **kwargs)
     self.dataset = dataset
Esempio n. 39
0
 def __init__(self, decisions, objectives=None):
   O.__init__(self)
   Point.id += 1
   self.id = Point.id
   self.decisions = decisions
   self.objectives = objectives
Esempio n. 40
0
 def __init__(self, name, parent, **kwargs):
     self.name = name
     self.parent = parent
     self.children = {}
     self._danglings = {}
     O.__init__(self, **kwargs)