Esempio n. 1
0
	def _add_tal_graph(self, graph):
		records = graph.add_chain(
			MatchingFiles(path='/', pattern=self.tal_pattern, fs='fs.data.aata'),
			CurriedXMLReader(xpath='/auth_TAL_XML/record', fs='fs.data.aata', limit=self.limit),
			RecordCounter(name='tal', verbose=self.debug),
			_xml_element_to_dict,
		)
		tal = self.add_tal_chain(graph, records)
		return tal
Esempio n. 2
0
	def _add_series_graph(self, graph):
		records = graph.add_chain(
			MatchingFiles(path='/', pattern=self.series_pattern, fs='fs.data.aata'),
			CurriedXMLReader(xpath='/series_XML/record', fs='fs.data.aata', limit=self.limit),
			RecordCounter(name='series', verbose=self.debug),
			_xml_element_to_dict,
		)
		series = self.add_series_chain(graph, records)
		return series
Esempio n. 3
0
	def _add_people_graph(self, graph):
		records = graph.add_chain(
			MatchingFiles(path='/', pattern=self.people_pattern, fs='fs.data.aata'),
			CurriedXMLReader(xpath='/auth_person_XML/record', fs='fs.data.aata', limit=self.limit),
			RecordCounter(name='people', verbose=self.debug),
			_xml_element_to_dict,
		)
		people = self.add_people_chain(graph, records)
		return people
Esempio n. 4
0
	def _add_abstracts_graph(self, graph):
		abstract_records = graph.add_chain(
			MatchingFiles(path='/', pattern=self.abstracts_pattern, fs='fs.data.aata'),
			CurriedXMLReader(xpath='/AATA_XML/record', fs='fs.data.aata', limit=self.limit),
			RecordCounter(name='abstracts', verbose=self.debug),
			_xml_element_to_dict,
		)
		articles = self.add_articles_chain(graph, abstract_records)
		return articles
Esempio n. 5
0
 def _add_geog_graph(self, graph):
     records = graph.add_chain(
         MatchingFiles(path='/',
                       pattern=self.geog_pattern,
                       fs='fs.data.aata'),
         CurriedXMLReader(xpath='/auth_geog_XML/record',
                          fs='fs.data.aata',
                          limit=self.limit),
         _xml_element_to_dict,
     )
     geog = self.add_geog_chain(graph, records)
     return geog
Esempio n. 6
0
 def _add_journals_graph(self, graph):
     records = graph.add_chain(
         MatchingFiles(path='/',
                       pattern=self.journals_pattern,
                       fs='fs.data.aata'),
         CurriedXMLReader(xpath='/journal_XML/record',
                          fs='fs.data.aata',
                          limit=self.limit),
         _xml_element_to_dict,
     )
     journals = self.add_journals_chain(graph, records)
     return journals
Esempio n. 7
0
	def _construct_graph(self, services=None):
		'''
		Construct bonobo.Graph object for the entire pipeline.
		'''
		g = bonobo.Graph()

		contents_records = g.add_chain(
			MatchingFiles(path='/', pattern=self.contents_files_pattern, fs='fs.data.people'),
			CurriedCSVReader(fs='fs.data.people', limit=self.limit, field_names=self.contents_headers),
			KeyManagement(
				operations=[
					{
						'group': {
							'person': {
								'rename_keys': {
									'person_authority': 'auth_name',
									'person_auth_disp': 'auth_display_name',
									'ulan_id': 'ulan',
									'birth_date': 'birth',
									'death_date': 'death',
									'notes': 'internal_notes'
								},
								'properties': (
									'star_record_no',
									'person_authority',
									'person_auth_disp',
									'variant_names',
									'type',
									'project',
									'birth_date',
									'death_date',
									'period_active',
									'century_active',
									'active_city_date',
									'nationality',
									'location',
									'address',
									'subjects_painted',
									'source',
									'medal_received',
									'text',
									'notes',
									'brief_notes',
									'working_notes',
									'bibliography',
									'ulan_id',
									'segment',
								)
							}
						}
					}
				]
			),
# 			Trace(name='foo', ordinals=range(10)),
			ExtractKeyedValue(key='person'),
			AddPerson(helper=self.helper),
		)

		_ = self.add_person_or_group_chain(g, contents_records, serialize=True)
		_ = self.add_places_chain(g, contents_records, key='places', serialize=True)
		

		self.graph = g