Example #1
0
	def test_procurement_boundary(self):
		vocab.add_linked_art_boundary_check()
		a = model.Activity()
		p = vocab.ProvenanceEntry()
		a.caused = p
		js = factory.toJSON(a)
		self.assertTrue(not 'classified_as' in js['caused'][0])		
Example #2
0
def rewrite_output_files(r,
                         update_filename=False,
                         parallel=False,
                         concurrency=4,
                         path=None,
                         files=None,
                         **kwargs):
    print(f'Rewriting JSON output files')
    vocab.add_linked_art_boundary_check()
    vocab.add_attribute_assignment_check()
    if not files:
        if path is None:
            path = output_file_path
        p = Path(path)
        files = p.rglob('*.json')
    files = list(files)

    if 'content_filter_re' in kwargs:
        print(f'rewriting with content filter: {kwargs["content_filter_re"]}')
    if parallel:
        pool = multiprocessing.Pool(concurrency)

        partition_size = max(min(25000, int(len(files) / concurrency)), 10)
        file_partitions = list(chunks(files, partition_size))
        args = list((file_partition, r, update_filename, i + 1,
                     len(file_partitions), kwargs)
                    for i, file_partition in enumerate(file_partitions))
        print(f'{len(args)} worker partitions with size {partition_size}')
        _ = pool.starmap(_rewrite_output_files, args)
    else:
        _rewrite_output_files(files, r, update_filename, 1, 1, kwargs)
Example #3
0
    def run(self, **options):
        vocab.add_linked_art_boundary_check()
        vocab.add_attribute_assignment_check()
        services = self.get_services(**options)
        super().run(services=services, **options)

        post_map = services['post_sale_map']
        self.generate_prev_post_sales_data(post_map)
Example #4
0
	def test_linguistic_object_boundary(self):
		vocab.add_linked_art_boundary_check()
		jrnl = vocab.JournalText(label="journal")
		issue = vocab.IssueText(label="issue")
		issue.part_of = jrnl
		issue.referred_to_by = vocab.MaterialStatement(content="Statement")

		js = factory.toJSON(issue)
		# Have not embedded journal in issue
		self.assertTrue(not 'classified_as' in js['part_of'][0])
		# Have embedded statement in issue
		self.assertTrue('content' in js['referred_to_by'][0])
		self.assertTrue('type' in js['referred_to_by'][0]['classified_as'][0]['classified_as'][0])
Example #5
0
	def test_boundary_setter(self):
		vocab.add_linked_art_boundary_check()
		p = model.Person()
		p2 = model.Person()
		n = model.Name()
		n.content = "Test"
		p2.identified_by = n
		p.exact_match = p2
		# Now, Test should not appear in the resulting JSON of p
		factory.linked_art_boundaries = True
		js = factory.toJSON(p)
		self.assertTrue(not 'identified_by' in js['exact_match'][0])
		factory.linked_art_boundaries = False
		js = factory.toJSON(p)
		self.assertTrue('identified_by' in js['exact_match'][0])		
Example #6
0
 def run(self, **options):
     vocab.add_linked_art_boundary_check()
     vocab.add_attribute_assignment_check()
     services = self.get_services(**options)
     super().run(services=services, **options)
Example #7
0
	project = sys.argv[1]
	pipe = importlib.import_module(f'pipeline.projects.{project}')
	Pipeline = pipe.Pipeline
	sys.argv = [sys.argv[0], *sys.argv[2:]]

### Run the Pipeline

if __name__ == '__main__':
	if settings.DEBUG:
		LIMIT		= int(os.environ.get('GETTY_PIPELINE_LIMIT', 1))
		PACK_SIZE = 1
	else:
		LIMIT		= int(os.environ.get('GETTY_PIPELINE_LIMIT', 10000000))
		PACK_SIZE = 10000000

	vocab.add_linked_art_boundary_check()

	print_dot = False
	if 'dot' in sys.argv[1:]:
		print_dot = True
		sys.argv[1:] = [a for a in sys.argv[1:] if a != 'dot']
	parser = bonobo.get_argument_parser()
	with bonobo.parse_args(parser) as options:
		try:
			pipeline = Pipeline(
				output_path=settings.output_file_path,
				models=settings.arches_models,
				pack_size=PACK_SIZE,
				limit=LIMIT,
				debug=settings.DEBUG
			)