def test_dict1_all_fields(self):
     """bibindex termcollectors - get_field_values - complicated field"""
     fields = self.dict1
     phrases =[]
     get_values_recursively(fields['all'], phrases)
     self.assertEqual(phrases, ['engine', 'flat tyre', 'windscreen', 'Airplane', 'x,y - plane',
                                'Odin', 'Eric', 'Frank', 'Theodor', 'Richard'])
Example #2
0
 def test_dict1_all_fields(self):
     """bibindex termcollectors - get_field_values - complicated field"""
     fields = self.dict1
     phrases = []
     get_values_recursively(fields['all'], phrases)
     self.assertEqual(phrases, [
         'engine', 'flat tyre', 'windscreen', 'Airplane', 'x,y - plane',
         'Odin', 'Eric', 'Frank', 'Theodor', 'Richard'
     ])
Example #3
0
 def tokenize_via_recjson(self, recID):
     """
     Tokenizes for journal info.
     Uses bibfield.
     """
     phrases = []
     rec = get_record(recID)
     recjson_field = rec.get(self.nonmarc_tag)
     get_values_recursively(recjson_field, phrases)
     final = []
     append = final.append
     for phrase in phrases:
         info = phrase.split("-", 1)
         append(info[0])
     return final
 def tokenize_via_recjson(self, recID):
     """
     Tokenizes for journal info.
     Uses bibfield.
     """
     phrases = []
     rec = get_record(recID)
     recjson_field = rec.get(self.nonmarc_tag)
     get_values_recursively(recjson_field, phrases)
     final = []
     append = final.append
     for phrase in phrases:
         info = phrase.split("-", 1)
         append(info[0])
     return final
 def _collect_string(self, recIDs, termslist):
     """
     Collects terms from specific tags or fields.
     Used together with string tokenizer.
     """
     tags = self.tags
     for recID in recIDs:
         rec = get_record(recID)
         new_words = []
         extend = new_words.extend
         for tag in tags:
             tokenizing_function = self.special_tags.get(tag, self.tokenizing_function)
             phrases = []
             recjson_field = rec.get(tag)
             get_values_recursively(recjson_field, phrases)
             for phrase in phrases:
                 extend(tokenizing_function(phrase))
         if recID not in termslist and new_words:
             termslist[recID] = []
         if new_words:
             termslist[recID] = list_union(new_words, termslist[recID])
     return termslist
Example #6
0
 def _collect_string(self, recIDs, termslist):
     """
     Collects terms from specific tags or fields.
     Used together with string tokenizer.
     """
     tags = self.tags
     for recID in recIDs:
         rec = get_record(recID)
         new_words = []
         extend = new_words.extend
         for tag in tags:
             tokenizing_function = self.special_tags.get(
                 tag, self.tokenizing_function)
             phrases = []
             recjson_field = rec.get(tag)
             get_values_recursively(recjson_field, phrases)
             for phrase in phrases:
                 extend(tokenizing_function(phrase))
         if recID not in termslist and new_words:
             termslist[recID] = []
         if new_words:
             termslist[recID] = list_union(new_words, termslist[recID])
     return termslist
 def test_dict2_all_fields(self):
     """bibindex termcollectors - get_field_values - nested field"""
     fields = self.dict2
     phrases =[]
     get_values_recursively(fields['all'], phrases)
     self.assertEqual(phrases, ['name1', 'name2', 'name4'])
 def test_dict1_subfield(self):
     """bibindex termcollectors - get_field_values - simple field"""
     fields = self.dict1
     phrases =[]
     get_values_recursively(fields['all']['people'], phrases)
     self.assertEqual(phrases, ['Frank', 'Theodor', 'Richard'])
Example #9
0
 def test_dict2_all_fields(self):
     """bibindex termcollectors - get_field_values - nested field"""
     fields = self.dict2
     phrases = []
     get_values_recursively(fields['all'], phrases)
     self.assertEqual(phrases, ['name1', 'name2', 'name4'])
Example #10
0
 def test_dict1_subfield(self):
     """bibindex termcollectors - get_field_values - simple field"""
     fields = self.dict1
     phrases = []
     get_values_recursively(fields['all']['people'], phrases)
     self.assertEqual(phrases, ['Frank', 'Theodor', 'Richard'])