Ejemplo n.º 1
0
    def test_merge(self):
        p1 = self.person('Eve 1')
        p2 = self.person('Eve 2')

        merger = CromObjectMerger()
        merger.merge(p1, p2)

        referrers = p1.referred_to_by
        self.assertEqual(len(referrers), 1)
Ejemplo n.º 2
0
	def test_pipeline_sales(self):
		'''
		When dimensions get merged, the Unknown physical dimension classification (300055642)
		gets dropped if there are any other classifications.
		'''
		h1 = vocab.Height(ident='', content=9.0)
		h1.unit = vocab.instances.get('inches')
		self.assertEqual({c._label for c in h1.classified_as}, {'Height'})

		h2 = vocab.PhysicalDimension(ident='', content=9.0)
		self.assertEqual({c._label for c in h2.classified_as}, {'Unknown physical dimension'})
		
		merger = CromObjectMerger()
		
		h = merger.merge(h1, h2)
		self.assertEqual({c._label for c in h.classified_as}, {'Height'})
Ejemplo n.º 3
0
 def merge_objects(self, objects):
     r = JSONValueRewriter(self.prev_post_sales_map)
     for k in list(objects.keys()):
         data = objects[k]
         updated = r.rewrite(data)
         ident = updated['id']
         if k != ident:
             if ident in objects:
                 read = reader.Reader()
                 m = read.read(json.dumps(objects[ident]))
                 n = read.read(json.dumps(updated))
                 merger = CromObjectMerger()
                 m = merger.merge(m, n)
                 objects[ident] = json.loads(factory.toString(m, False))
             else:
                 objects[ident] = updated
             del (objects[k])
Ejemplo n.º 4
0
	def __init__(self, *args, **kwargs):
		'''
		Sets the __name__ property to include the relevant options so that when the
		bonobo graph is serialized as a GraphViz document, different objects can be
		visually differentiated.
		'''
		super().__init__(self, *args, **kwargs)
		self.merger = CromObjectMerger()
		self.__name__ = f'{type(self).__name__} ({self.model})'

		self.dr = os.path.join(self.directory, self.model)
		with ExclusiveValue(self.dr):
			if not os.path.exists(self.dr):
				os.mkdir(self.dr)
			if self.partition_directories:
				for partition in [('00' + str(hex(x))[2:])[-2:] for x in range(256)]:
					pp = os.path.join(self.dr, partition)
					if not os.path.exists(pp):
						os.mkdir(pp)
Ejemplo n.º 5
0
 def __init__(self):
     self.output = {}
     self.merger = CromObjectMerger()
     super().__init__()
Ejemplo n.º 6
0
def _rewrite_output_files(files, r, update_filename, worker_id, total_workers,
                          kwargs):
    i = 0
    if not files:
        return
    print(
        f'rewrite worker partition {worker_id} called with {len(files)} files [{files[0]} .. {files[-1]}]'
    )
    start = time.time()
    rewritten_count = 0
    processed_count = 0
    ignore_errors = kwargs.get('ignore_errors', False)
    for i, f in enumerate(files):
        processed_count += 1
        # print(f'{i} {f}', end="\r", flush=True)
        with open(f) as data_file:
            try:
                bytes = data_file.read()
                if 'content_filter_re' in kwargs:
                    filter_re = kwargs['content_filter_re']
                    if not re.search(filter_re, bytes):
                        pass
                        # 						print(f'skipping   {f}')
                        continue
                    else:
                        pass
# 						print(f'processing {f}')
                data = json.loads(bytes)
            except json.decoder.JSONDecodeError:
                sys.stderr.write(
                    f'Failed to load JSON during rewriting of {f}\n')
                if ignore_errors:
                    continue
                else:
                    raise
        d = r.rewrite(data, file=f)
        if update_filename:
            newfile = filename_for(d, original_filename=f, **kwargs)
        else:
            newfile = f
        if d == data and f == newfile:
            # nothing changed; do not rewrite the file
            continue
        else:
            pass
            # print(f'*** rewrote data in {f} --> {newfile}')
        if newfile != f:
            if os.path.exists(newfile):
                read = reader.Reader()
                merger = CromObjectMerger()
                with open(newfile, 'r') as fh:
                    content = fh.read()
                    try:
                        m = read.read(content)
                        n = read.read(d)
                        # 						print('========================= MERGING =========================')
                        # 						print('merging objects:')
                        # 						print(f'- {m}')
                        # 						print(f'- {n}')
                        merger.merge(m, n)


# 					except model.DataError as e:
                    except Exception as e:
                        print(
                            f'Exception caught while merging data from {newfile} ({str(e)}):'
                        )
                        print(d)
                        print(content)
                        if ignore_errors:
                            continue
                        else:
                            raise
                    data = factory.toString(m, False)
                    d = json.loads(data)
        with open(newfile, 'w') as data_file:
            rewritten_count += 1
            json.dump(d, data_file, indent=2, ensure_ascii=False)
        if newfile != f:
            os.remove(f)
    end = time.time()
    elapsed = end - start
    if rewritten_count:
        print(
            f'worker partition {worker_id}/{total_workers} finished with {rewritten_count}/{processed_count} files rewritten in %.1fs'
            % (elapsed, ))
    else:
        print(
            f'worker partition {worker_id}/{total_workers} finished in %.1fs' %
            (elapsed, ))
Ejemplo n.º 7
0
for id in sorted(counter):
    count = counter[id]
    if count > 1:
        files = files_by_id[id]
        for filename in files:
            with open(filename, 'r') as fh:
                content = fh.read()
                canon_file = None
                canon_content = None
                try:
                    m = read.read(content)
                    id = m.id
                    if id in seen:
                        canon_file = seen[id]
                        # 				print(f'*** {id} already seen in {canon_file} ; merging {filename}')
                        merger = CromObjectMerger()
                        with open(canon_file, 'r') as cfh:
                            canon_content = cfh.read()
                            n = read.read(canon_content)
                            try:
                                merger.merge(m, n)
                            except model.DataError as e:
                                print(
                                    f'Exception caught while merging data from {newfile} ({str(e)}):'
                                )
                                print(d)
                                print(content)
                                raise
                        merged_data = factory.toString(m, False)
                        d = json.loads(merged_data)
                        with open(canon_file, 'w') as data_file: