Esempio n. 1
0
def transform(
        shock_service_url=None,
        #handle_service_url=None,
        #output_file_name=None,
        input_fasta_directory=None,
        #working_directory=None, shock_id=None, handle_id=None,
        #input_mapping=None, fasta_reference_only=False,
        wsname=None,
        wsurl=None,
        genome_list_file=None,
        #              taxon_wsname=None,
        #              taxon_names_file=None,
        level=logging.INFO,
        logger=None):
    """
    Uploads CondensedGenomeAssembly
    Args:
        shock_service_url: A url for the KBase SHOCK service.
        input_fasta_directory: The directory where files will be read from.
        level: Logging level, defaults to logging.INFO.
        
    Returns:
        JSON file on disk that can be saved as a KBase workspace object.
    Authors:
        Jason Baumohl, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stderrlogger(__file__)

    assembly_ws_client = doekbase.workspace.client.Workspace(wsurl)

    assembly_workspace_object = assembly_ws_client.get_workspace_info(
        {'workspace': wsname})

    #    taxon_ws_client = doekbase.workspace.client.Workspace(wsurl)

    #    taxon_workspace_object = ws_client.get_workspace_info({'workspace':taxon_wsname})

    workspace_id = assembly_workspace_object[0]
    workspace_name = assembly_workspace_object[1]

    #    #key scientific name, value is taxon object name (taxid_taxon)
    #    scientific_names_lookup = dict()
    #    taxon_names_file = taxon_names_file[0]

    #    if os.path.isfile(taxon_names_file):
    #        print "Found taxon_names_File"
    #        name_f = open(taxon_names_file, 'r')
    #        counter = 0
    #        for name_line in name_f:
    #            temp_list = re.split(r'\t*\|\t*', name_line)
    #            if temp_list[3] == "scientific name":
    #                scientific_names_lookup[temp_list[1]] = "%s_taxon" % (str(temp_list[0]))
    #        name_f.close()

    genomes_list = list()
    #    genome_list_file = genome_list_file[0]
    if os.path.isfile(genome_list_file):
        print "Found Genome_list_File"
    genomes_f = open(genome_list_file, 'r')
    for genome_line in genomes_f:
        temp_list = re.split(r'\n*', genome_line)
        genomes_list.append(temp_list[0])
    genomes_f.close()

    logger.info("Starting conversion of FASTA to Assemblies")
    token = os.environ.get('KB_AUTH_TOKEN')

    #    if input_mapping is None:
    #        logger.info("Scanning for FASTA files.")
    #        valid_extensions = [".fa",".fasta",".fna"]
    #        files = os.listdir(input_directory)
    #        fasta_files = [x for x in files if os.path.splitext(x)[-1] in valid_extensions]
    #        assert len(fasta_files) != 0
    #        logger.info("Found {0}".format(str(fasta_files)))
    #        input_file_name = os.path.join(input_directory,files[0])
    #        if len(fasta_files) > 1:
    #            logger.warning("Not sure how to handle multiple FASTA files in this context. Using {0}".format(input_file_name))
    #    else:
    #        input_file_name = os.path.join(os.path.join(input_directory, "FASTA.DNA.Assembly"), simplejson.loads(input_mapping)["FASTA.DNA.Assembly"])

    for genome_id in genomes_list:

        logger.info("Building Object.")

        temp_genome_id = genome_id
        temp_genome_id.replace("|", "\|")
        input_file_name = "%s/%s.fasta" % (input_fasta_directory,
                                           temp_genome_id)
        if not os.path.isfile(input_file_name):
            raise Exception("The input file name {0} is not a file!".format(
                input_file_name))

#        if not os.path.isdir(args.working_directory):
#            raise Exception("The working directory {0} is not a valid directory!".format(working_directory))

#        logger.debug(fasta_reference_only)

        input_file_handle = TextFileDecoder.open_textdecoder(
            input_file_name, 'ISO-8859-1')
        #   input_file_handle = open(input_file_name, 'r')

        fasta_header = None
        sequence_list = []
        fasta_dict = dict()
        first_header_found = False
        contig_set_md5_list = []
        # Pattern for replacing white space
        pattern = re.compile(r'\s+')
        sequence_exists = False

        total_length = 0
        gc_length = 0
        #Note added X and x due to kb|g.1886.fasta
        valid_chars = "-AaCcGgTtUuWwSsMmKkRrYyBbDdHhVvNnXx"
        amino_acid_specific_characters = "PpLlIiFfQqEe"

        sequence_start = 0
        sequence_stop = 0

        current_line = input_file_handle.readline()
        #    for current_line in input_file_handle:
        while current_line != None and len(current_line) > 0:

            #        print "CURRENT LINE: " + current_line
            if (current_line[0] == ">"):
                # found a header line
                # Wrap up previous fasta sequence
                if (not sequence_exists) and first_header_found:
                    logger.error(
                        "There is no sequence related to FASTA record : {0}".
                        format(fasta_header))
                    raise Exception(
                        "There is no sequence related to FASTA record : {0}".
                        format(fasta_header))
                if not first_header_found:
                    first_header_found = True
                    #                sequence_start = input_file_handle.tell()
                    sequence_start = 0
                else:
                    sequence_stop = input_file_handle.tell() - len(
                        current_line)
                    # build up sequence and remove all white space
                    total_sequence = ''.join(sequence_list)
                    total_sequence = re.sub(pattern, '', total_sequence)
                    if not total_sequence:
                        logger.error(
                            "There is no sequence related to FASTA record : {0}"
                            .format(fasta_header))
                        raise Exception(
                            "There is no sequence related to FASTA record : {0}"
                            .format(fasta_header))
                    for character in total_sequence:
                        if character not in valid_chars:
                            if character in amino_acid_specific_characters:
                                raise Exception(
                                    "This fasta file may have amino acids in it instead of the required nucleotides."
                                )
                            raise Exception(
                                "This FASTA file has non nucleic acid characters : {0}"
                                .format(character))
                    length = len(total_sequence)
                    total_length = total_length + length
                    contig_gc_length = len(
                        re.findall('G|g|C|c', total_sequence))
                    contig_dict = dict()
                    contig_dict["gc_content"] = float(
                        contig_gc_length) / float(length)
                    gc_length = gc_length + contig_gc_length
                    fasta_key = fasta_header.strip()
                    contig_dict["contig_id"] = fasta_key
                    contig_dict["length"] = length
                    contig_dict["name"] = fasta_key
                    contig_dict[
                        "description"] = "Note MD5 is generated from uppercasing the sequence"
                    contig_md5 = hashlib.md5(
                        total_sequence.upper()).hexdigest()
                    contig_dict["md5"] = contig_md5
                    contig_set_md5_list.append(contig_md5)
                    contig_dict["is_circular"] = "unknown"
                    contig_dict["start_position"] = sequence_start
                    contig_dict["num_bytes"] = sequence_stop - sequence_start

                    #                    print "Sequence Start: " + str(sequence_start) + "Fasta: " + fasta_key
                    #                    print "Sequence Stop: " + str(sequence_stop) + "Fasta: " + fasta_key
                    fasta_dict[fasta_key] = contig_dict

                    # get set up for next fasta sequence
                    sequence_list = []
                    sequence_exists = False

#                    sequence_start = input_file_handle.tell()
                sequence_start = 0

                fasta_header = current_line.replace('>', '')
            else:
                if sequence_start == 0:
                    sequence_start = input_file_handle.tell() - len(
                        current_line)
                sequence_list.append(current_line)
                sequence_exists = True
            current_line = input_file_handle.readline()

        # wrap up last fasta sequence
        if (not sequence_exists) and first_header_found:
            logger.error(
                "There is no sequence related to FASTA record : {0}".format(
                    fasta_header))
            raise Exception(
                "There is no sequence related to FASTA record : {0}".format(
                    fasta_header))
        elif not first_header_found:
            logger.error("There are no contigs in this file")
            raise Exception("There are no contigs in this file")
        else:
            sequence_stop = input_file_handle.tell()
            # build up sequence and remove all white space
            total_sequence = ''.join(sequence_list)
            total_sequence = re.sub(pattern, '', total_sequence)
            if not total_sequence:
                logger.error(
                    "There is no sequence related to FASTA record : {0}".
                    format(fasta_header))
                raise Exception(
                    "There is no sequence related to FASTA record : {0}".
                    format(fasta_header))

            for character in total_sequence:
                if character not in valid_chars:
                    if character in amino_acid_specific_characters:
                        raise Exception(
                            "This fasta file may have amino acids in it instead of the required nucleotides."
                        )
                    raise Exception(
                        "This FASTA file has non nucleic acid characters : {0}"
                        .format(character))

            length = len(total_sequence)
            total_length = total_length + length
            contig_gc_length = len(re.findall('G|g|C|c', total_sequence))
            contig_dict = dict()
            contig_dict["gc_content"] = float(contig_gc_length) / float(length)
            gc_length = gc_length + contig_gc_length
            fasta_key = fasta_header.strip()
            contig_dict["contig_id"] = fasta_key
            contig_dict["length"] = length
            contig_dict["name"] = fasta_key
            contig_dict[
                "description"] = "Note MD5 is generated from uppercasing the sequence"
            contig_md5 = hashlib.md5(total_sequence.upper()).hexdigest()
            contig_dict["md5"] = contig_md5
            contig_set_md5_list.append(contig_md5)
            contig_dict["is_circular"] = "unknown"
            contig_dict["start_position"] = sequence_start
            contig_dict["num_bytes"] = sequence_stop - sequence_start

            fasta_dict[fasta_key] = contig_dict
        input_file_handle.close()

        #        if output_file_name is None:
        #            # default to input file name minus file extenstion adding "_contig_set" to the end
        #            base = os.path.basename(input_file_name)
        #            output_file_name = "{0}_contig_set.json".format(os.path.splitext(base)[0])

        contig_set_dict = dict()
        contig_set_dict["md5"] = hashlib.md5(",".join(
            sorted(contig_set_md5_list))).hexdigest()
        contig_set_dict["assembly_id"] = genome_id
        contig_set_dict["name"] = genome_id
        contig_set_dict["external_source"] = "KBase"
        contig_set_dict["external_source_id"] = os.path.basename(
            input_file_name)
        contig_set_dict["external_source_origination_date"] = str(
            os.stat(input_file_name).st_ctime)
        contig_set_dict["contigs"] = fasta_dict
        contig_set_dict["dna_size"] = total_length
        contig_set_dict["gc_content"] = float(gc_length) / float(total_length)
        contig_set_dict["num_contigs"] = len(fasta_dict.keys())
        contig_set_dict["type"] = "Unknown"
        contig_set_dict["notes"] = "Unknown"

        shock_id = None
        if shock_id is None:
            shock_info = script_utils.upload_file_to_shock(logger,
                                                           shock_service_url,
                                                           input_file_name,
                                                           token=token)
            shock_id = shock_info["id"]

        contig_set_dict["fasta_handle_ref"] = shock_id

        # For future development if the type is updated to the handle_reference instead of a shock_reference

        assembly_not_saved = True
        assembly_provenance = [{
            "script":
            __file__,
            "script_ver":
            "0.1",
            "description":
            "Generated from fasta files generated from v5 of the CS."
        }]
        while assembly_not_saved:
            try:
                assembly_info = assembly_ws_client.save_objects({
                    "workspace":
                    workspace_name,
                    "objects": [{
                        "type": "KBaseGenomesCondensedPrototypeV2.Assembly",
                        "data": contig_set_dict,
                        "name": "%s_assembly" % (genome_id),
                        "provenance": assembly_provenance
                    }]
                })
                assembly_not_saved = False
            except doekbase.workspace.client.ServerError as err:
                print "SAVE FAILED ON genome " + str(
                    genome_id) + " ERROR: " + err
                raise
            except:
                print "SAVE FAILED ON genome " + str(
                    genome_id) + " GENERAL_EXCEPTION: " + str(
                        sys.exc_info()[0])
                raise

        logger.info("Conversion completed.")
Esempio n. 2
0
def transform(shock_service_url=None, 
              #handle_service_url=None, 
              #output_file_name=None, 
              input_fasta_directory=None, 
              #working_directory=None, shock_id=None, handle_id=None, 
              #input_mapping=None, fasta_reference_only=False,
              wsname=None,
              wsurl=None,
              genome_list_file=None,
#              taxon_wsname=None,
#              taxon_names_file=None,
              level=logging.INFO, logger=None):
    """
    Uploads CondensedGenomeAssembly
    Args:
        shock_service_url: A url for the KBase SHOCK service.
        input_fasta_directory: The directory where files will be read from.
        level: Logging level, defaults to logging.INFO.
        
    Returns:
        JSON file on disk that can be saved as a KBase workspace object.
    Authors:
        Jason Baumohl, Matt Henderson
    """

    if logger is None:
        logger = script_utils.stderrlogger(__file__)


    assembly_ws_client = doekbase.workspace.client.Workspace(wsurl)
 
    assembly_workspace_object = assembly_ws_client.get_workspace_info({'workspace':wsname}) 
 
#    taxon_ws_client = doekbase.workspace.client.Workspace(wsurl)
 
#    taxon_workspace_object = ws_client.get_workspace_info({'workspace':taxon_wsname}) 
 
    workspace_id = assembly_workspace_object[0] 
    workspace_name = assembly_workspace_object[1] 


#    #key scientific name, value is taxon object name (taxid_taxon)
#    scientific_names_lookup = dict()
#    taxon_names_file = taxon_names_file[0]

#    if os.path.isfile(taxon_names_file): 
#        print "Found taxon_names_File" 
#        name_f = open(taxon_names_file, 'r') 
#        counter = 0 
#        for name_line in name_f: 
#            temp_list = re.split(r'\t*\|\t*', name_line) 
#            if temp_list[3] == "scientific name": 
#                scientific_names_lookup[temp_list[1]] = "%s_taxon" % (str(temp_list[0]))
#        name_f.close()


    genomes_list = list()
#    genome_list_file = genome_list_file[0]
    if os.path.isfile(genome_list_file): 
        print "Found Genome_list_File" 
    genomes_f = open(genome_list_file, 'r') 
    for genome_line in genomes_f: 
        temp_list = re.split(r'\n*', genome_line)
        genomes_list.append(temp_list[0])
    genomes_f.close()

    logger.info("Starting conversion of FASTA to Assemblies")
    token = os.environ.get('KB_AUTH_TOKEN')
        
#    if input_mapping is None:
#        logger.info("Scanning for FASTA files.")
#        valid_extensions = [".fa",".fasta",".fna"]
#        files = os.listdir(input_directory)
#        fasta_files = [x for x in files if os.path.splitext(x)[-1] in valid_extensions]
#        assert len(fasta_files) != 0
#        logger.info("Found {0}".format(str(fasta_files)))
#        input_file_name = os.path.join(input_directory,files[0])
#        if len(fasta_files) > 1:
#            logger.warning("Not sure how to handle multiple FASTA files in this context. Using {0}".format(input_file_name))
#    else:
#        input_file_name = os.path.join(os.path.join(input_directory, "FASTA.DNA.Assembly"), simplejson.loads(input_mapping)["FASTA.DNA.Assembly"])
        
    for genome_id in genomes_list:

        logger.info("Building Object.")

        temp_genome_id = genome_id
        temp_genome_id.replace("|","\|")
        input_file_name = "%s/%s.fasta" % (input_fasta_directory,temp_genome_id) 
        if not os.path.isfile(input_file_name):
            raise Exception("The input file name {0} is not a file!".format(input_file_name))        

#        if not os.path.isdir(args.working_directory):
#            raise Exception("The working directory {0} is not a valid directory!".format(working_directory))        

#        logger.debug(fasta_reference_only)


        input_file_handle = TextFileDecoder.open_textdecoder(input_file_name, 'ISO-8859-1')
 #   input_file_handle = open(input_file_name, 'r')
    
        fasta_header = None
        sequence_list = []
        fasta_dict = dict()
        first_header_found = False
        contig_set_md5_list = []
        # Pattern for replacing white space
        pattern = re.compile(r'\s+')
        sequence_exists = False
    
        total_length = 0
        gc_length = 0
        #Note added X and x due to kb|g.1886.fasta
        valid_chars = "-AaCcGgTtUuWwSsMmKkRrYyBbDdHhVvNnXx"
        amino_acid_specific_characters = "PpLlIiFfQqEe" 

        sequence_start = 0
        sequence_stop = 0

        current_line = input_file_handle.readline()
#    for current_line in input_file_handle:
        while current_line != None and len(current_line) > 0:

#        print "CURRENT LINE: " + current_line
            if (current_line[0] == ">"):
                # found a header line
                # Wrap up previous fasta sequence
                if (not sequence_exists) and first_header_found:
                    logger.error("There is no sequence related to FASTA record : {0}".format(fasta_header))        
                    raise Exception("There is no sequence related to FASTA record : {0}".format(fasta_header))
                if not first_header_found:
                    first_header_found = True
                #                sequence_start = input_file_handle.tell()
                    sequence_start = 0
                else:
                    sequence_stop = input_file_handle.tell() - len(current_line)
                    # build up sequence and remove all white space
                    total_sequence = ''.join(sequence_list)
                    total_sequence = re.sub(pattern, '', total_sequence)
                    if not total_sequence :
                        logger.error("There is no sequence related to FASTA record : {0}".format(fasta_header)) 
                        raise Exception("There is no sequence related to FASTA record : {0}".format(fasta_header))
                    for character in total_sequence:
                        if character not in valid_chars:
                            if character in amino_acid_specific_characters:
                                raise Exception("This fasta file may have amino acids in it instead of the required nucleotides.")
                            raise Exception("This FASTA file has non nucleic acid characters : {0}".format(character))
                    length = len(total_sequence)
                    total_length = total_length + length
                    contig_gc_length = len(re.findall('G|g|C|c',total_sequence))
                    contig_dict = dict() 
                    contig_dict["gc_content"] = float(contig_gc_length)/float(length) 
                    gc_length = gc_length + contig_gc_length
                    fasta_key = fasta_header.strip()
                    contig_dict["contig_id"] = fasta_key 
                    contig_dict["length"] = length 
                    contig_dict["name"] = fasta_key 
                    contig_dict["description"] = "Note MD5 is generated from uppercasing the sequence" 
                    contig_md5 = hashlib.md5(total_sequence.upper()).hexdigest() 
                    contig_dict["md5"] = contig_md5 
                    contig_set_md5_list.append(contig_md5)
                    contig_dict["is_circular"] = "unknown"
                    contig_dict["start_position"] = sequence_start
                    contig_dict["num_bytes"] = sequence_stop - sequence_start


#                    print "Sequence Start: " + str(sequence_start) + "Fasta: " + fasta_key
#                    print "Sequence Stop: " + str(sequence_stop) + "Fasta: " + fasta_key
                    fasta_dict[fasta_key] = contig_dict
               
                    # get set up for next fasta sequence
                    sequence_list = []
                    sequence_exists = False
                
#                    sequence_start = input_file_handle.tell()               
                sequence_start = 0            

                fasta_header = current_line.replace('>','')
            else:
                if sequence_start == 0:
                    sequence_start = input_file_handle.tell() - len(current_line) 
                sequence_list.append(current_line)
                sequence_exists = True
            current_line = input_file_handle.readline()

        # wrap up last fasta sequence
        if (not sequence_exists) and first_header_found: 
            logger.error("There is no sequence related to FASTA record : {0}".format(fasta_header))        
            raise Exception("There is no sequence related to FASTA record : {0}".format(fasta_header)) 
        elif not first_header_found :
            logger.error("There are no contigs in this file") 
            raise Exception("There are no contigs in this file") 
        else: 
            sequence_stop = input_file_handle.tell()
            # build up sequence and remove all white space      
            total_sequence = ''.join(sequence_list)
            total_sequence = re.sub(pattern, '', total_sequence)
            if not total_sequence :
                logger.error("There is no sequence related to FASTA record : {0}".format(fasta_header)) 
                raise Exception("There is no sequence related to FASTA record : {0}".format(fasta_header)) 

            for character in total_sequence: 
                if character not in valid_chars: 
                    if character in amino_acid_specific_characters:
                        raise Exception("This fasta file may have amino acids in it instead of the required nucleotides.")
                    raise Exception("This FASTA file has non nucleic acid characters : {0}".format(character))

            length = len(total_sequence)
            total_length = total_length + length
            contig_gc_length = len(re.findall('G|g|C|c',total_sequence))
            contig_dict = dict()
            contig_dict["gc_content"] = float(contig_gc_length)/float(length) 
            gc_length = gc_length + contig_gc_length
            fasta_key = fasta_header.strip()
            contig_dict["contig_id"] = fasta_key 
            contig_dict["length"] = length
            contig_dict["name"] = fasta_key
            contig_dict["description"] = "Note MD5 is generated from uppercasing the sequence" 
            contig_md5 = hashlib.md5(total_sequence.upper()).hexdigest()
            contig_dict["md5"]= contig_md5
            contig_set_md5_list.append(contig_md5)
            contig_dict["is_circular"] = "unknown"
            contig_dict["start_position"] = sequence_start
            contig_dict["num_bytes"] = sequence_stop - sequence_start
        
            fasta_dict[fasta_key] = contig_dict 
        input_file_handle.close()

#        if output_file_name is None:
#            # default to input file name minus file extenstion adding "_contig_set" to the end
#            base = os.path.basename(input_file_name)
#            output_file_name = "{0}_contig_set.json".format(os.path.splitext(base)[0])
    
        contig_set_dict = dict()
        contig_set_dict["md5"] = hashlib.md5(",".join(sorted(contig_set_md5_list))).hexdigest()
        contig_set_dict["assembly_id"] = genome_id
        contig_set_dict["name"] = genome_id
        contig_set_dict["external_source"] = "KBase"
        contig_set_dict["external_source_id"] = os.path.basename(input_file_name) 
        contig_set_dict["external_source_origination_date"] = str(os.stat(input_file_name).st_ctime)
        contig_set_dict["contigs"] = fasta_dict
        contig_set_dict["dna_size"] = total_length
        contig_set_dict["gc_content"] = float(gc_length)/float(total_length)
        contig_set_dict["num_contigs"] = len(fasta_dict.keys())
        contig_set_dict["type"] = "Unknown"
        contig_set_dict["notes"] = "Unknown"

        shock_id = None
        if shock_id is None:
            shock_info = script_utils.upload_file_to_shock(logger, shock_service_url, input_file_name, token=token)
            shock_id = shock_info["id"]
    
        contig_set_dict["fasta_handle_ref"] = shock_id

        # For future development if the type is updated to the handle_reference instead of a shock_reference


        assembly_not_saved = True 
        assembly_provenance = [{"script": __file__, "script_ver": "0.1", "description": "Generated from fasta files generated from v5 of the CS."}]
        while assembly_not_saved: 
            try: 
                assembly_info =  assembly_ws_client.save_objects({"workspace": workspace_name,"objects":[ 
                            {"type":"KBaseGenomesCondensedPrototypeV2.Assembly", 
                             "data":contig_set_dict, 
                             "name": "%s_assembly" % (genome_id), 
                             "provenance":assembly_provenance}]}) 
                assembly_not_saved = False 
            except doekbase.workspace.client.ServerError as err:
                print "SAVE FAILED ON genome " + str(genome_id) + " ERROR: " + err 
                raise 
            except: 
                print "SAVE FAILED ON genome " + str(genome_id) + " GENERAL_EXCEPTION: " + str(sys.exc_info()[0]) 
                raise 
    
        logger.info("Conversion completed.")
Esempio n. 3
0
    #                        help=script_details["Args"]["output_file_name"],
    #                        action='store', type=str, nargs='?', default=None, required=False)
    #    parser.add_argument('--shock_id',
    #                        help=script_details["Args"]["shock_id"],
    #                        action='store', type=str, nargs='?', default=None, required=False)
    #    parser.add_argument('--handle_id',
    #                        help=script_details["Args"]["handle_id"],
    #                        action='store', type=str, nargs='?', default=None, required=False)

    #    parser.add_argument('--input_mapping',
    #                        help=script_details["Args"]["input_mapping"],
    #                        action='store', type=unicode, nargs='?', default=None, required=False)

    args, unknown = parser.parse_known_args()

    logger = script_utils.stderrlogger(__file__)

    logger.debug(args)
    try:

        transform(
            shock_service_url=args.shock_service_url,
            #                  handle_service_url = args.handle_service_url,
            #                  output_file_name = args.output_file_name,
            input_fasta_directory=args.input_fasta_directory,
            #                  working_directory = args.working_directory,
            #                  shock_id = args.shock_id,
            #                  handle_id = args.handle_id,
            #                  input_mapping = args.input_mapping,
            wsname=args.wsname,
            wsurl=args.wsurl,
Esempio n. 4
0
#                        help=script_details["Args"]["output_file_name"],
#                        action='store', type=str, nargs='?', default=None, required=False)
#    parser.add_argument('--shock_id', 
#                        help=script_details["Args"]["shock_id"],
#                        action='store', type=str, nargs='?', default=None, required=False)
#    parser.add_argument('--handle_id', 
#                        help=script_details["Args"]["handle_id"], 
#                        action='store', type=str, nargs='?', default=None, required=False)

#    parser.add_argument('--input_mapping', 
#                        help=script_details["Args"]["input_mapping"], 
#                        action='store', type=unicode, nargs='?', default=None, required=False)

    args, unknown = parser.parse_known_args()

    logger = script_utils.stderrlogger(__file__)

    logger.debug(args)
    try:
    
        transform(shock_service_url = args.shock_service_url, 
#                  handle_service_url = args.handle_service_url, 
#                  output_file_name = args.output_file_name, 
                  input_fasta_directory = args.input_fasta_directory, 
#                  working_directory = args.working_directory, 
#                  shock_id = args.shock_id, 
#                  handle_id = args.handle_id,
#                  input_mapping = args.input_mapping,
                  wsname = args.wsname,
                  wsurl = args.wsurl,
#                  taxon_wsname = args.taxon_wsname,