def test_to_first_split(self): s3_drop = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='key123', profile_name='aws-profile') copy_from_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_from_s3.sh %iDataURL0 /dfms_root/%o0', user='******') measurement_set = DirectoryContainer(self.get_oid('dir'), uuid.uuid4(), dirname=TestChiles02._temp) copy_from_s3.addInput(s3_drop) copy_from_s3.addOutput(measurement_set) outputs = [] frequencies = make_groups_of_frequencies(FREQUENCY_GROUPS, 5) frequencies = frequencies[0] for group in frequencies: casa_py_drop = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='casa_py.sh /dfms_root/%i0 /dfms_root/%o0 {0} {1}'.format(group[0], group[1]), user='******') result = FileDROP(self.get_oid('file'), uuid.uuid4(), dirname=TestChiles02._temp) copy_to_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_to_s3.sh /dfms_root/%i0 %oDataURL0', user='******') s3_drop_out = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='{0}_{1}/key123'.format(group[0], group[1]), profile_name='aws-profile') casa_py_drop.addInput(measurement_set) casa_py_drop.addOutput(result) copy_to_s3.addInput(result) copy_to_s3.addOutput(s3_drop_out) outputs.append(s3_drop_out) barrier_drop = BarrierAppDROP(self.get_oid('barrier'), uuid.uuid4()) barrier_drop.addInput(measurement_set) for output in outputs: barrier_drop.addInput(output) with droputils.DROPWaiterCtx(self, barrier_drop, 50000): s3_drop.setCompleted()
def build_graph(self): self._build_node_map() for day_to_process in self._keys: node_id = self._get_next_node(day_to_process) carry_over_data = self._map_carry_over_data[node_id] list_frequency_groups = self._work_to_do[day_to_process] frequency_groups = make_groups_of_frequencies(list_frequency_groups, self._parallel_streams) add_output_s3 = [] if carry_over_data.drop_listobs is not None: add_output_s3.append(carry_over_data.drop_listobs) measurement_set, properties, drop_listobs = \ self._setup_measurement_set( day_to_process, carry_over_data.barrier_drop, add_output_s3, node_id ) carry_over_data.drop_listobs = drop_listobs outputs = [] for group in frequency_groups: last_element = None for frequency_pairs in group: last_element = self._split( last_element, frequency_pairs, measurement_set, properties, get_observation(day_to_process.full_tar_name), node_id ) if last_element is not None: outputs.append(last_element) barrier_drop = self.create_barrier_app(node_id) carry_over_data.barrier_drop = barrier_drop for output in outputs: if output is not None: barrier_drop.addInput(output) self.copy_logfiles_and_shutdown()
def build_graph(self): self._build_node_map() for day_to_process in self._keys: node_id = self._get_next_node(day_to_process) carry_over_data = self._map_carry_over_data[node_id] list_frequency_groups = self._work_to_do[day_to_process] frequency_groups = make_groups_of_frequencies( list_frequency_groups, self._parallel_streams) add_output_s3 = [] if carry_over_data.drop_listobs is not None: add_output_s3.append(carry_over_data.drop_listobs) measurement_set, properties, drop_listobs = \ self._setup_measurement_set( day_to_process, carry_over_data.barrier_drop, add_output_s3, node_id ) carry_over_data.drop_listobs = drop_listobs outputs = [] for group in frequency_groups: last_element = None for frequency_pairs in group: last_element = self._split( last_element, frequency_pairs, measurement_set, properties, get_observation(day_to_process.full_tar_name), node_id) if last_element is not None: outputs.append(last_element) barrier_drop = self.create_barrier_app(node_id) carry_over_data.barrier_drop = barrier_drop for output in outputs: if output is not None: barrier_drop.addInput(output) self.copy_logfiles_and_shutdown()