def test_sketch_from_sequence(sketches_json): for sketch_json in sketches_json: sketch = Sketch.from_fs_json(sketch_json) seq = sketch_to_sequence(sketch) sketch2 = sketch_from_sequence(seq) seq2 = sketch_to_sequence(sketch) assert len(seq) == len(seq2) for op1, op2 in zip(seq, seq2): assert op1 == op2
def test_sketch_from_sequence(sketches_json): for sketch_json in sketches_json: sketch = Sketch.from_fs_json(sketch_json, include_external_constraints=False) seq = sketch_to_sequence(sketch) if any(s.label in _UNSUPPORTED_CONSTRAINTS for s in seq): # Skip not supported constraints for now continue sketch2 = sketch_from_sequence(seq) seq2 = sketch_to_sequence(sketch2) assert len(seq) == len(seq2) for op1, op2 in zip(seq, seq2): assert op1 == op2
def sketch_to_sequence(sketch): """Converts the given sketch to a construction sequence.""" return data_sequence.sketch_to_sequence(sketch)
def _worker(config, processed_sequences, filter_config): worker_idx = config['worker_idx'] chunk_size = config['chunk_size'] sketches = _get_sketch_iterable(config) filtered_reasons = collections.Counter() num_invalid = 0 sequences = [] sketch_ids = [] count_in_chunk = 0 chunk_index = 0 num_filtered_in_chunk = 0 for sketch_id, sketch in sketches: filter_reason = filter_sketch(sketch, filter_config) if filter_reason != FilterReason.Accepted: filtered_reasons[filter_reason] += 1 num_filtered_in_chunk += 1 continue try: seq = sequence.sketch_to_sequence(sketch) _normalize_constraint_parameters(seq) sequences.append(seq) sketch_ids.append(sketch_id) count_in_chunk += 1 except Exception as err: num_invalid += 1 print( 'Error processing sketch {2} in document {0} part {1}.'.format( *sketch_id)) traceback.print_exception(type(err), err, err.__traceback__) if count_in_chunk >= chunk_size: sequence_lengths = np.array([len(seq) for seq in sequences], dtype=np.int64) sketch_ids = np.array(sketch_ids, dtype=[('document_id', 'S24'), ('part_idx', '<i4'), ('sketch_idx', '<i4')]) offsets, sequence_data = flat_array.raw_list_flat(sequences) processed_sequences.put( ((worker_idx, chunk_index), offsets, sequence_data, sequence_lengths, sketch_ids, num_filtered_in_chunk)) sequences = [] sketch_ids = [] count_in_chunk = 0 chunk_index += 1 num_filtered_in_chunk = 0 # Send final batch of data sequence_lengths = np.array([len(seq) for seq in sequences], dtype=np.int64) sketch_ids = np.array(sketch_ids, dtype=[('document_id', 'S24'), ('part_idx', '<i4'), ('sketch_idx', '<i4')]) offsets, sequence_data = flat_array.raw_list_flat(sequences) processed_sequences.put( ((worker_idx, chunk_index), offsets, sequence_data, sequence_lengths, sketch_ids, num_filtered_in_chunk)) processed_sequences.put({ 'filtered': filtered_reasons, 'errors': num_invalid })
def test_sequence_from_sketch(sketches_json): for sketch_json in sketches_json: sketch = Sketch.from_fs_json(sketch_json) seq = sketch_to_sequence(sketch)