Beispiel #1
0
def _sentence_to_link_contexts_reducer(redirects_lookup, page, contexts_acc,
                                       sentence):
    contexts = _sentence_to_link_contexts(redirects_lookup, page, sentence)
    if not _.is_empty(contexts):
        concat = lambda dest, src: dest + src if dest else src
        _.merge_with(contexts_acc, contexts, iteratee=concat)
    return contexts_acc
Beispiel #2
0
 def update_changed_test_case(self, test_case, new_commit, conf):
     old_test_case = test_case['old_test_line']
     new_test_case = test_case['new_test_line']
     class_name, _old_test_case = self.get_test_case_from_line(
         old_test_case, "")
     class_name, _new_test_case = self.get_test_case_from_line(
         new_test_case, "")
     old_t = self.get_test_cases_document(_old_test_case, conf)
     new_t = self.get_test_cases_document(_new_test_case, conf)
     self.remove_unwanted_fields(_old_test_case)
     self.remove_unwanted_fields(_new_test_case)
     history = self.get_history(new_commit, "change")
     old_document_key = hashlib.md5(
         json.dumps(_old_test_case, sort_keys=True)).hexdigest()
     new_document_key = hashlib.md5(
         json.dumps(_new_test_case, sort_keys=True)).hexdigest()
     client = CLIENT[BUCKET]
     try:
         old_document = client.get(old_document_key).value
     except:
         old_document = None
     if old_document_key == new_document_key:
         new_t.change_history = [history]
         to_upsert = new_t.__dict__
         new_conf = pydash.clone_deep(to_upsert['confFile'])
         pydash.merge_with(to_upsert, old_document,
                           TestCaseCollector._merge_dict)
         TestCaseCollector._flatten_conf(to_upsert['confFile'], new_conf)
         client.upsert(old_document_key, to_upsert)
     else:
         old_t.change_history = [history]
         new_t.change_history = [history]
         old_t.changed = True
         new_t.changed = True
         old_t.newChangedDocId = new_document_key
         new_t.oldChangedDocId = old_document_key
         old_to_upsert = old_t.__dict__
         new_to_upsert = new_t.__dict__
         if old_document:
             new_conf = pydash.clone_deep(old_to_upsert['confFile'])
             old_to_upsert = pydash.merge_with(
                 old_to_upsert, old_document, TestCaseCollector._merge_dict)
             TestCaseCollector._flatten_conf(old_to_upsert['confFile'],
                                             new_conf)
         try:
             new_document = client.get(new_document_key).value
         except:
             new_document = None
         if new_document:
             new_to_upsert = pydash.merge_with(
                 new_to_upsert, new_document, TestCaseCollector._merge_dict)
         new_conf = pydash.clone_deep(new_to_upsert['confFile'])
         new_to_upsert = pydash.merge_with(new_to_upsert, old_document,
                                           TestCaseCollector._merge_dict)
         TestCaseCollector._flatten_conf(new_to_upsert['confFile'],
                                         new_conf)
         client.upsert(old_document_key, old_to_upsert)
         client.upsert(new_document_key, new_to_upsert)
def merge_mentions(processed_pages):
  '''merge the link contexts from a list of pages'''
  concat = lambda dest, src: dest + src if dest else src
  link_contexts = reduce(lambda acc, val: _.merge_with(acc, val, iteratee=concat),
                         [processed_page['link_contexts'] for processed_page in processed_pages],
                         {})
  entity_counts = reduce(lambda acc, val: _.merge_with(acc, val, iteratee=concat),
                          [processed_page['entity_counts'] for processed_page in processed_pages],
                          {})
  return _.map_values(link_contexts,
                      lambda val, key: {'link_contexts': val,
                                        'entity_counts': entity_counts[key]})
Beispiel #4
0
def _apply_match_heuristic(page, link_contexts, to_match, entity):
    '''helper for defining heuristics for finding mentions of an entity'''
    matches = u.match_all(to_match, page['plaintext'])
    mentions = sum(link_contexts.values(), [])
    link_context = {
        entity: [{
            'text': to_match,
            'offset': match_index,
            'page_title': page['title'],
            'preredirect': _.upper_first(entity)
        } for match_index in matches]
    }
    filtered_link_context = {
        entity: [
            mention for mention in link_context[entity]
            if not _mention_overlaps(mentions, mention)
        ]
    }
    concat = lambda dest, src: _.uniq_by(dest + src, 'offset') if dest else src
    if not _.is_empty(filtered_link_context[entity]):
        return _.merge_with(link_contexts,
                            filtered_link_context,
                            iteratee=concat)
    else:
        return link_contexts
Beispiel #5
0
 def _merge_dict(obj_value, src_value, key, obj, source):
     if isinstance(obj_value, list):
         new_array = pydash.union(obj_value, src_value)
         return new_array
     if isinstance(obj_value, str):
         return obj_value
     if isinstance(obj_value, dict):
         return pydash.merge_with(obj_value, src_value,
                                  TestCaseCollector._merge_dict)
     return obj_value
Beispiel #6
0
 def update_deleted_test_case(self, test_case, new_commit, conf):
     class_name, _test_case = self.get_test_case_from_line(test_case, "")
     t = self.get_test_cases_document(_test_case, conf)
     t.deleted = True
     history = self.get_history(new_commit, "delete")
     t.change_history = [history]
     self.remove_unwanted_fields(_test_case)
     document_key = hashlib.md5(json.dumps(_test_case,
                                           sort_keys=True)).hexdigest()
     client = CLIENT[BUCKET]
     try:
         existing_document = client.get(document_key).value
         to_upsert = t.__dict__
         new_conf = pydash.clone_deep(to_upsert['confFile'])
         pydash.merge_with(to_upsert, existing_document,
                           TestCaseCollector._merge_dict)
         TestCaseCollector._flatten_conf(to_upsert['confFile'], new_conf)
         client.upsert(document_key, to_upsert)
     except Exception as e:
         print e
Beispiel #7
0
def test_merge_with(case, expected):
    assert _.merge_with(*case) == expected