def handle(self, *args, **options): required = ['username', 'title', 'file_name', 'source_column_index', 'data_file_column'] for arg in required: if not options[arg]: raise CommandError('%s was not provided.' % arg) parser = SingleFileColumnParser() parser.file_permanent = options['data_file_permanent'] parser.file_column_index = int( options['data_file_column'] ) parser.source_column_index = [int(x.strip()) for x in options['source_column_index'].split(",")] parser.column_index_separator = "/" parser.file_base_path = options['base_path'] if options['auxiliary_file_column'] is not None: parser.auxiliary_file_column_index = int( options['auxiliary_file_column'] ) if options['species_column'] is not None: parser.species_column_index = int( options['species_column'] ) if options['genome_build_column'] is not None: parser.genome_build_column_index = int( options['genome_build_column'] ) if options['annotation_column'] is not None: parser.annotation_column_index = int( options['annotation_column'] ) investigation = parser.run(options['file_name']) investigation.title = options['title'] investigation.save() create_dataset( investigation.uuid, options['username'], dataset_title=options['title'], slug=options['slug'], public=options['is_public'] )
def handle(self, *args, **options): # set pre-defined options for the Refinery default tabular file format options['source_column_index'] = "0" # source = filename options['data_file_column'] = "1" options['auxiliary_file_column'] = "2" options['species_column'] = "3" options['genome_build_column'] = "4" options['annotation_column'] = "5" options['data_file_permanent'] = True required = ['username', 'title', 'file_name'] for arg in required: if not options[arg]: raise CommandError('%s was not provided.' % arg) parser = SingleFileColumnParser() parser.source_column_index = [ int(x.strip()) for x in options['source_column_index'].split(",") ] parser.column_index_separator = "/" parser.file_base_path = options['base_path'] # fixed settings parser.file_column_index = int(options['data_file_column']) parser.auxiliary_file_column_index = int( options['auxiliary_file_column']) parser.species_column_index = int(options['species_column']) parser.genome_build_column_index = int(options['genome_build_column']) parser.annotation_column_index = int(options['annotation_column']) parser.file_permanent = options['data_file_permanent'] investigation = parser.run(options['file_name']) investigation.title = options['title'] investigation.save() create_dataset(investigation.uuid, options['username'], dataset_title=options['title'], slug=options['slug'], public=options['is_public'])
def handle(self, *args, **options): # set pre-defined options for the Refinery default tabular file format options["source_column_index"] = "0" # source = filename options["data_file_column"] = "1" options["auxiliary_file_column"] = "2" options["species_column"] = "3" options["genome_build_column"] = "4" options["annotation_column"] = "5" options["data_file_permanent"] = True required = ["username", "title", "file_name"] for arg in required: if not options[arg]: raise CommandError("%s was not provided." % arg) parser = SingleFileColumnParser() parser.source_column_index = [int(x.strip()) for x in options["source_column_index"].split(",")] parser.column_index_separator = "/" parser.file_base_path = options["base_path"] # fixed settings parser.file_column_index = int(options["data_file_column"]) parser.auxiliary_file_column_index = int(options["auxiliary_file_column"]) parser.species_column_index = int(options["species_column"]) parser.genome_build_column_index = int(options["genome_build_column"]) parser.annotation_column_index = int(options["annotation_column"]) parser.file_permanent = options["data_file_permanent"] investigation = parser.run(options["file_name"]) investigation.title = options["title"] investigation.save() create_dataset( investigation.uuid, options["username"], dataset_title=options["title"], slug=options["slug"], public=options["is_public"], )
def process_metadata_table(username, title, metadata_file, source_columns, data_file_column, auxiliary_file_column=None, base_path="", data_file_permanent=False, species_column=None, genome_build_column=None, annotation_column=None, sample_column=None, assay_column=None, slug=None, is_public=False): """Create a dataset given a metadata file object and its description :param username: username :type username: str :param title: dataset name :type title: str :param metadata_file: metadata file in tab-delimited format :type metadata_file: file :param source_columns: a list of source column indices :type source_columns: list of ints :param data_file_column: data file column index :type data_file_column: int :param data_file_permanent: should data files be imported :type data_file_permanent: bool :param base_path: path to append to data file :type base_path: str :param auxiliary_file_column: auxiliary file column index :type auxiliary_file_column: int :param species_column: species column index :type species_column: int :param genome_build_column: genome build column index :type genome_build_column: int :param annotation_column: annotation column index :type annotation_column: int :param slug: dataset name shortcut :type slug: str :param is_public: is dataset available to public :type is_public: bool :returns: UUID of the new dataset """ try: source_columns = [abs(int(x)) for x in source_columns] except ValueError as exc: logger.error(exc) raise ValueError("source column indices must be integers") try: data_file_column = int(data_file_column) except ValueError as exc: logger.error(exc) raise ValueError("data file column index must be an integer") try: auxiliary_file_column = int(auxiliary_file_column) except (TypeError, ValueError): auxiliary_file_column = None try: base_path = str(base_path) except ValueError: base_path = "" try: species_column = int(species_column) except (TypeError, ValueError): species_column = None try: genome_build_column = int(genome_build_column) except (TypeError, ValueError): genome_build_column = None try: annotation_column = int(annotation_column) except (TypeError, ValueError): annotation_column = None try: sample_column = int(sample_column) except (TypeError, ValueError): sample_column = None try: assay_column = int(assay_column) except (TypeError, ValueError): assay_column = None try: slug = str(slug) except ValueError: slug = None data_file_permanent = bool(data_file_permanent) is_public = bool(is_public) file_source_translator = generate_file_source_translator( username=username, base_path=base_path) parser = SingleFileColumnParser( metadata_file=metadata_file, file_source_translator=file_source_translator, source_column_index=source_columns, data_file_column_index=data_file_column, auxiliary_file_column_index=auxiliary_file_column, file_base_path=base_path, data_file_permanent=data_file_permanent, species_column_index=species_column, genome_build_column_index=genome_build_column, annotation_column_index=annotation_column, sample_column_index=sample_column, assay_column_index=assay_column, column_index_separator="/") investigation = parser.run() investigation.title = title investigation.save() return create_dataset( investigation_uuid=investigation.uuid, username=username, dataset_name=title, slug=slug, public=is_public)
def process_metadata_table(username, title, metadata_file, source_columns, data_file_column, auxiliary_file_column=None, base_path="", data_file_permanent=False, species_column=None, genome_build_column=None, annotation_column=None, sample_column=None, assay_column=None, slug=None, is_public=False, delimiter="comma", custom_delimiter_string=","): """Create a dataset given a metadata file object and its description :param username: username :type username: str :param title: dataset name :type title: str :param metadata_file: metadata file in tab-delimited format :type metadata_file: file :param source_columns: a list of source column indices :type source_columns: list of ints :param data_file_column: data file column index :type data_file_column: int :param data_file_permanent: should data files be imported :type data_file_permanent: bool :param base_path: path to append to data file :type base_path: str :param auxiliary_file_column: auxiliary file column index :type auxiliary_file_column: int :param species_column: species column index :type species_column: int :param genome_build_column: genome build column index :type genome_build_column: int :param annotation_column: annotation column index :type annotation_column: int :param slug: dataset name shortcut :type slug: str :param is_public: is dataset available to public :type is_public: bool :returns: UUID of the new dataset """ try: source_columns = [abs(int(x)) for x in source_columns] except ValueError as exc: logger.error(exc) raise ValueError("source column indices must be integers") try: data_file_column = int(data_file_column) except ValueError as exc: logger.error(exc) raise ValueError("data file column index must be an integer") try: auxiliary_file_column = int(auxiliary_file_column) except (TypeError, ValueError): auxiliary_file_column = None try: base_path = str(base_path) except ValueError: base_path = "" try: species_column = int(species_column) except (TypeError, ValueError): species_column = None try: genome_build_column = int(genome_build_column) except (TypeError, ValueError): genome_build_column = None try: annotation_column = int(annotation_column) except (TypeError, ValueError): annotation_column = None try: sample_column = int(sample_column) except (TypeError, ValueError): sample_column = None try: assay_column = int(assay_column) except (TypeError, ValueError): assay_column = None try: if slug: slug = str(slug) except ValueError: slug = None try: delimiter = str(delimiter) except ValueError: delimiter = "comma" try: custom_delimiter_string = str(custom_delimiter_string) except ValueError: custom_delimiter_string = "," data_file_permanent = bool(data_file_permanent) is_public = bool(is_public) file_source_translator = generate_file_source_translator( username=username, base_path=base_path) parser = SingleFileColumnParser( metadata_file=metadata_file, file_source_translator=file_source_translator, source_column_index=source_columns, data_file_column_index=data_file_column, auxiliary_file_column_index=auxiliary_file_column, file_base_path=base_path, data_file_permanent=data_file_permanent, species_column_index=species_column, genome_build_column_index=genome_build_column, annotation_column_index=annotation_column, sample_column_index=sample_column, assay_column_index=assay_column, column_index_separator="/", delimiter=delimiter, custom_delimiter_string=custom_delimiter_string) investigation = parser.run() investigation.title = title investigation.save() return create_dataset(investigation_uuid=investigation.uuid, username=username, dataset_name=title, slug=slug, public=is_public)