def test_read_sdf(self): compounds = compound_parsing.read_sdf('test_compounds.sdf', inchi_path='../data/Inchikey_IDs.json', mol2_file_dir=os.path.join(self.scratch, 'mol2_files'), callback_url=os.environ['SDK_CALLBACK_URL']) self.assertEqual(len(compounds), 10) self.assertCountEqual(compounds[0].keys(), comp_keys - {'deltagerr', 'deltag'}) assert len(compounds[0]['fingerprints']) == 2 assert 'mol' in compounds[0]
def compound_set_from_file(self, ctx, params): """ CompoundSetFromFile string staging_file_path :param params: instance of type "compoundset_upload_params" -> structure: parameter "workspace_name" of String, parameter "staging_file_path" of String, parameter "compound_set_name" of String :returns: instance of type "compoundset_upload_results" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "compoundset_ref" of type "obj_ref" """ # ctx is the context object # return variables are: output #BEGIN compound_set_from_file self._check_required_param(params, ['workspace_name', 'staging_file_path', 'compound_set_name']) scratch_file_path = self.dfu.download_staging_file( {'staging_file_subdir_path': params['staging_file_path']} ).get('copy_file_path') # I probably should be uploading the raw files to shock ext = os.path.splitext(scratch_file_path)[1] file_name = os.path.basename(scratch_file_path) if ext == '.sdf': compounds = parse.read_sdf(scratch_file_path) elif ext == '.tsv': compounds = parse.read_tsv(scratch_file_path) else: raise ValueError('Invalid input file type. Expects .tsv or .sdf') compoundset = { 'id': params['compound_set_name'], 'name': params['compound_set_name'], 'description': 'Compound Set produced from %s' % file_name, 'compounds': compounds, } output = self._save_to_ws_and_report(ctx, 'compound_set_from_file', params['workspace_name'], params['staging_file_path'], compoundset) #END compound_set_from_file # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method compound_set_from_file return value ' + 'output is not type dict as required.') # return the results return [output]
def compound_set_from_file(self, ctx, params): """ CompoundSetFromFile string staging_file_path :param params: instance of type "compoundset_upload_params" -> structure: parameter "workspace_id" of String, parameter "staging_file_path" of String, parameter "compound_set_name" of String, parameter "mol2_staging_file_path" of String :returns: instance of type "compoundset_upload_results" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "compoundset_ref" of type "obj_ref" """ # ctx is the context object # return variables are: output #BEGIN compound_set_from_file self._check_param( params, ['workspace_id', 'staging_file_path', 'compound_set_name'], opt_param=['mol2_staging_file_path']) scratch_file_path = self.dfu.download_staging_file({ 'staging_file_subdir_path': params['staging_file_path'] }).get('copy_file_path') # I probably should be uploading the raw files to shock mol2_staging_file_path = params.get('mol2_staging_file_path') mol2_file_dir = None if mol2_staging_file_path: mol2_scratch_file_path = self.dfu.download_staging_file({ 'staging_file_subdir_path': mol2_staging_file_path }).get('copy_file_path') try: logging.info("start unpacking mol2 file") mol2_file_path_out = self.dfu.unpack_file( {'file_path': mol2_scratch_file_path})['file_path'] mol2_file_dir = os.path.dirname(mol2_file_path_out) except Exception: raise ValueError('Cannot unpack mol2 file: {}'.format( os.path.basename(mol2_file_path_out))) ext = os.path.splitext(scratch_file_path)[1] file_name = os.path.basename(scratch_file_path) if ext == '.sdf': compounds = parse.read_sdf(scratch_file_path, mol2_file_dir=mol2_file_dir, callback_url=self.callback_url) elif ext == '.tsv': compounds = parse.read_tsv(scratch_file_path, mol2_file_dir=mol2_file_dir, callback_url=self.callback_url) else: raise ValueError('Invalid input file type. Expects .tsv or .sdf') compoundset = { 'id': params['compound_set_name'], 'name': params['compound_set_name'], 'description': 'Compound Set produced from %s' % file_name, 'compounds': compounds, } output = self._save_to_ws_and_report(params['workspace_id'], params['staging_file_path'], compoundset) #END compound_set_from_file # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method compound_set_from_file return value ' + 'output is not type dict as required.') # return the results return [output]
def test_read_sdf(): compounds = compound_parsing.read_sdf('test_compounds.sdf') assert len(compounds) == 10 assert not set(compounds[0].keys()) ^ (comp_keys - {'deltagerr', 'deltag'}) assert len(compounds[0]['fingerprints']) == 2