def test_read_sdf(self):
     compounds = compound_parsing.read_sdf('test_compounds.sdf',
                                           inchi_path='../data/Inchikey_IDs.json',
                                           mol2_file_dir=os.path.join(self.scratch, 'mol2_files'),
                                           callback_url=os.environ['SDK_CALLBACK_URL'])
     self.assertEqual(len(compounds), 10)
     self.assertCountEqual(compounds[0].keys(), comp_keys - {'deltagerr', 'deltag'})
     assert len(compounds[0]['fingerprints']) == 2
     assert 'mol' in compounds[0]
Exemple #2
0
    def compound_set_from_file(self, ctx, params):
        """
        CompoundSetFromFile
        string staging_file_path
        :param params: instance of type "compoundset_upload_params" ->
           structure: parameter "workspace_name" of String, parameter
           "staging_file_path" of String, parameter "compound_set_name" of
           String
        :returns: instance of type "compoundset_upload_results" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "compoundset_ref" of type "obj_ref"
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN compound_set_from_file
        self._check_required_param(params, ['workspace_name',
                                            'staging_file_path',
                                            'compound_set_name'])
        scratch_file_path = self.dfu.download_staging_file(
            {'staging_file_subdir_path': params['staging_file_path']}
        ).get('copy_file_path')
        # I probably should be uploading the raw files to shock

        ext = os.path.splitext(scratch_file_path)[1]
        file_name = os.path.basename(scratch_file_path)
        if ext == '.sdf':
            compounds = parse.read_sdf(scratch_file_path)
        elif ext == '.tsv':
            compounds = parse.read_tsv(scratch_file_path)
        else:
            raise ValueError('Invalid input file type. Expects .tsv or .sdf')

        compoundset = {
            'id': params['compound_set_name'],
            'name': params['compound_set_name'],
            'description': 'Compound Set produced from %s' % file_name,
            'compounds': compounds,
        }

        output = self._save_to_ws_and_report(ctx, 'compound_set_from_file',
                                             params['workspace_name'],
                                             params['staging_file_path'],
                                             compoundset)
        #END compound_set_from_file

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method compound_set_from_file return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #3
0
    def compound_set_from_file(self, ctx, params):
        """
        CompoundSetFromFile
        string staging_file_path
        :param params: instance of type "compoundset_upload_params" ->
           structure: parameter "workspace_id" of String, parameter
           "staging_file_path" of String, parameter "compound_set_name" of
           String, parameter "mol2_staging_file_path" of String
        :returns: instance of type "compoundset_upload_results" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "compoundset_ref" of type "obj_ref"
        """
        # ctx is the context object
        # return variables are: output
        #BEGIN compound_set_from_file
        self._check_param(
            params, ['workspace_id', 'staging_file_path', 'compound_set_name'],
            opt_param=['mol2_staging_file_path'])
        scratch_file_path = self.dfu.download_staging_file({
            'staging_file_subdir_path':
            params['staging_file_path']
        }).get('copy_file_path')
        # I probably should be uploading the raw files to shock

        mol2_staging_file_path = params.get('mol2_staging_file_path')

        mol2_file_dir = None
        if mol2_staging_file_path:
            mol2_scratch_file_path = self.dfu.download_staging_file({
                'staging_file_subdir_path':
                mol2_staging_file_path
            }).get('copy_file_path')

            try:
                logging.info("start unpacking mol2 file")
                mol2_file_path_out = self.dfu.unpack_file(
                    {'file_path': mol2_scratch_file_path})['file_path']
                mol2_file_dir = os.path.dirname(mol2_file_path_out)
            except Exception:
                raise ValueError('Cannot unpack mol2 file: {}'.format(
                    os.path.basename(mol2_file_path_out)))

        ext = os.path.splitext(scratch_file_path)[1]
        file_name = os.path.basename(scratch_file_path)
        if ext == '.sdf':
            compounds = parse.read_sdf(scratch_file_path,
                                       mol2_file_dir=mol2_file_dir,
                                       callback_url=self.callback_url)
        elif ext == '.tsv':
            compounds = parse.read_tsv(scratch_file_path,
                                       mol2_file_dir=mol2_file_dir,
                                       callback_url=self.callback_url)
        else:
            raise ValueError('Invalid input file type. Expects .tsv or .sdf')

        compoundset = {
            'id': params['compound_set_name'],
            'name': params['compound_set_name'],
            'description': 'Compound Set produced from %s' % file_name,
            'compounds': compounds,
        }

        output = self._save_to_ws_and_report(params['workspace_id'],
                                             params['staging_file_path'],
                                             compoundset)
        #END compound_set_from_file

        # At some point might do deeper type checking...
        if not isinstance(output, dict):
            raise ValueError('Method compound_set_from_file return value ' +
                             'output is not type dict as required.')
        # return the results
        return [output]
Exemple #4
0
def test_read_sdf():
    compounds = compound_parsing.read_sdf('test_compounds.sdf')
    assert len(compounds) == 10
    assert not set(compounds[0].keys()) ^ (comp_keys - {'deltagerr', 'deltag'})
    assert len(compounds[0]['fingerprints']) == 2