Ejemplo n.º 1
0
    def import_spreadsheet(self, xls_filename):
        self._logger.debug("importing spreadsheet")

        ingest_api = IngestApi(url=Config['INGEST_API'])
        importer = XlsImporter(ingest_api)

        return importer.dry_run_import_file(file_path=self._xls_filename)
Ejemplo n.º 2
0
    def test_spreadsheet_import(self):
        self.metadata_spreadsheet_path = os.path.join(self.test_data_path,
                                                      SPREADSHEET_FILE)
        download_file(SPREADSHEET_LOCATION, self.metadata_spreadsheet_path)
        importer = XlsImporter(self.ingest_api)
        submission_resource = self.ingest_api.create_submission()

        submission_url = submission_resource["_links"]["self"]["href"].rsplit(
            "{")[0]
        submission, _ = importer.import_file(self.metadata_spreadsheet_path,
                                             submission_url, False)

        entities_by_type = {}

        for entity in submission.get_entities():
            entity_type = entity.type
            if not entities_by_type.get(entity_type):
                entities_by_type[entity_type] = []
            entities_by_type[entity_type].append(entity)

        files = list(self.ingest_api.get_entities(submission_url, 'files'))
        biomaterials = list(
            self.ingest_api.get_entities(submission_url, 'biomaterials'))
        protocols = list(
            self.ingest_api.get_entities(submission_url, 'protocols'))
        processes = list(
            self.ingest_api.get_entities(submission_url, 'processes'))

        self.assertEquals(len(files), len(entities_by_type['file']))
        self.assertEquals(len(biomaterials),
                          len(entities_by_type['biomaterial']))
        self.assertEquals(len(protocols), len(entities_by_type['protocol']))
        self.assertEquals(len(processes), len(entities_by_type['process']))
Ejemplo n.º 3
0
def upload_spreadsheet():
    try:
        logger.info("Uploading spreadsheet")
        token = _check_token()
        path = _save_spreadsheet()
        ingest_api = IngestApi()
        ingest_api.set_token(token)
        importer = XlsImporter(ingest_api)

        project = _check_for_project(ingest_api)

        project_uuid = None
        if project and project.get('uuid'):
            project_uuid = project.get('uuid').get('uuid')

        submission_url = ingest_api.createSubmission(token)

        _submit_spreadsheet_data(importer, path, submission_url, project_uuid)

        return create_upload_success_response(submission_url)
    except SpreadsheetUploadError as spreadsheetUploadError:
        return create_upload_failure_response(spreadsheetUploadError.http_code,
                                              spreadsheetUploadError.message,
                                              spreadsheetUploadError.details)
    except Exception as err:
        logger.error(traceback.format_exc())
        return create_upload_failure_response(
            500, "We experienced a problem while uploading your spreadsheet",
            str(err))
Ejemplo n.º 4
0
    def test_import_spreadsheet(self):

        spreadsheet_file = BASE_PATH + '/metadata_spleen_new_protocols.xlsx'

        submission = XlsImporter(MagicMock()).import_file(file_path=spreadsheet_file, submission_url=None, dry_run=True)

        self.assertTrue(submission)
Ejemplo n.º 5
0
def main():
    dsp = DspCLI()
    print(
        "Welcome to the HCA to DSP easy use script! Please, select the option that better suits your needs:\n"
    )
    print(
        "1 - Submission for dummies: Guided submisssion through the DSP, with indications and questions along the way\n"
        "2 - I want to do my own thing: Access to all the functions the DspCLI object provides\n"
        "3 - I just want to convert a spreadsheet into submittable objects and then exit."
    )
    while True:
        try:
            option = int(input())
            if 0 < option < 4:
                break
            else:
                print("Please select a valid option: 1, 2 or 3\n")
        except ValueError:
            print("Please select a valid option: 1, 2 or 3\n")

    if option == 2:
        while True:
            cli_function = show_cli_options(dsp)
            if not cli_function:
                break
            call_function(cli_function, dsp)
    if option == 3:
        # Import the spreadsheet with HCA ingest importer
        input_path = input(
            "Please provide with the path to the HCA spreadsheet file: ")
        print("Importing; Might take some time to process...\n\n")
        api = IngestApi(url="https://api.ingest.data.humancellatlas.org/")
        importer = XlsImporter(api)
        spreadsheet = importer.dry_run_import_file(input_path)
        entity_map = spreadsheet.get_entities()

        # Get JSON object list from the entity map
        json_list = get_json_from_map(entity_map)
        del json_list['files']
        # Write to folder
        output_path = input(
            "Please provide with the folder path for the submittable outputs: "
        )
        write_json_to_submit(json_list, output_path)
        print(f"Saved submittable JSONs to folder {output_path}")
    print("Goodbye! :)")
Ejemplo n.º 6
0
 def setUp(self):
     self.mock_ingest_api = MagicMock(spec=IngestApi)
     self.importer = XlsImporter(self.mock_ingest_api)
     self.mock_template_mgr = Mock()
     self.mock_template_mgr.get_schema_url = Mock(return_value='')
     self.spreadsheet_json_with_project_reference = {
         'project': {
             'project-uuid': {
                 'is_linking_reference': True,
             }
         }
     }
Ejemplo n.º 7
0
 def __init__(self, file):
     ingest_url = os.environ.get(
         'INGEST_API', 'https://api.ingest.data.humancellatlas.org')
     ingest_api = IngestApi(url=ingest_url)
     self.importer = XlsImporter(ingest_api)
     self.entity_map = self.importer.dry_run_import_file(file)
     self.node_by_type = self.entity_map.entities_dict_by_type
     self.links = {}
     self.files = []
     self.donors = []
     self.path_dict = {}
     self.process_prot_link = {}
     self.high_level = {}
     self.known_position = [
         "donor_organism", "library_preparation_protocol",
         "sequencing_protocol"
     ]
     self.unknown_position = [
         "specimen_from_organism", "cell_line", "organoid",
         "collection_protocol", "enrichment_protocol",
         "dissociation_protocol", "cell_suspension"
     ]
     self.high_level_paths = []
Ejemplo n.º 8
0
class GraphValidator:
    def __init__(self, file):
        ingest_url = os.environ.get(
            'INGEST_API', 'https://api.ingest.data.humancellatlas.org')
        ingest_api = IngestApi(url=ingest_url)
        self.importer = XlsImporter(ingest_api)
        self.entity_map = self.importer.dry_run_import_file(file)
        self.node_by_type = self.entity_map.entities_dict_by_type
        self.links = {}
        self.files = []
        self.donors = []
        self.path_dict = {}
        self.process_prot_link = {}
        self.high_level = {}
        self.known_position = [
            "donor_organism", "library_preparation_protocol",
            "sequencing_protocol"
        ]
        self.unknown_position = [
            "specimen_from_organism", "cell_line", "organoid",
            "collection_protocol", "enrichment_protocol",
            "dissociation_protocol", "cell_suspension"
        ]
        self.high_level_paths = []

    def get_all_links(self):
        order_list = ['file', 'biomaterial', 'process']
        for order in order_list:
            for uniq_node, val in self.node_by_type[order].items():
                process = False
                if order == 'file':
                    self.files.append(uniq_node)
                if order == 'biomaterial':
                    if self.node_by_type.get(order)[uniq_node].content.get(
                            'describedBy').split(
                                '/')[-1:][0] == 'donor_organism':
                        self.donors.append(uniq_node)
                direct_links = val.direct_links
                if direct_links:
                    for link in direct_links:
                        direct_link_id = link.get('id')
                        if link.get('entity') == 'process':
                            process = True
                            if uniq_node not in self.links.keys():
                                self.links[uniq_node] = []
                                self.links[uniq_node].append(direct_link_id)
                            else:
                                self.links[uniq_node].append(direct_link_id)
                        if order == 'process':
                            if link.get('entity') == 'protocol':
                                if uniq_node not in self.process_prot_link.keys(
                                ):
                                    self.process_prot_link[uniq_node] = []
                                    self.process_prot_link[uniq_node].append(
                                        link.get('id'))
                                else:
                                    if link.get(
                                            'id'
                                    ) not in self.process_prot_link[uniq_node]:
                                        self.process_prot_link[
                                            uniq_node].append(link.get('id'))
                if not process:
                    self.links[uniq_node] = []

    def add_process_links(self):
        for key, val in self.links.items():
            for link in val:
                if link in self.links.keys():
                    if key not in self.links[link]:
                        self.links[link].append(key)

    def identify_float(self):
        for key in self.links.keys():
            if not self.links[key]:
                print('%s is a floating entity' % key)

    def find_path(self, s, e, link_graph, path=None):
        if not path:
            path = []
        path = path + [s]
        # print(path)
        if s == e:
            return path
        if s not in link_graph:
            return None
        for node in link_graph[s]:
            if node not in path:
                extended_path = self.find_path(node, e, link_graph, path=path)

                if extended_path:
                    return extended_path
        return None

    def find_all_paths(self):
        for f in self.files:
            for d in self.donors:
                entity_full_path = self.find_path(f, d, self.links)
                if entity_full_path:
                    filename = entity_full_path[0]
                    entity_path = entity_full_path[1:]
                    for key in self.process_prot_link:
                        if key in entity_path:
                            entity_path[entity_path.index(
                                key)] = key + '/' + '/'.join(
                                    self.process_prot_link[key])
                    entity_path.reverse()
                    entity_path = ':'.join(entity_path)
                    if entity_path not in self.path_dict.keys():
                        self.path_dict[entity_path] = []
                        self.path_dict[entity_path].append(filename)
                    else:
                        self.path_dict[entity_path].append(filename)

    def clean_output(self):
        for key, val in self.path_dict.items():
            print('%s %s' % (key.replace(':', ' -> '), val))

    def gather_entity_info(self):
        for node_type in self.node_by_type:
            node_dict = self.node_by_type.get(node_type)
            for uniq_node, val in node_dict.items():
                specific_type = val.content.get('describedBy').split(
                    '/')[-1:][0]
                if specific_type not in self.high_level:
                    self.high_level[specific_type] = []
                    if uniq_node not in self.high_level[specific_type]:
                        self.high_level[specific_type].append(uniq_node)
                else:
                    self.high_level[specific_type].append(uniq_node)

    def summary(self):
        print('Number of bundles: %s' % len(self.path_dict.keys()))
        for key, val in self.high_level.items():
            print('Number of %s: %s' % (key, len(val)))