コード例 #1
0
def get_sequencer_platform(dirn, instrument=None, settings=None):
    """
    Return the platform for the sequencing instrument

    Attempts to identify the platform (e.g. 'hiseq', 'miseq' etc)
    for a sequencing run.

    If 'settings' is supplied then the platform is looked up
    based on the instrument names and platforms listed in the
    'sequencers' section of the configuration. If 'instrument'
    is also supplied then this is used; otherwise the instrument
    name is extracted from the supplied directory name.

    If no match can be found then there is a final attempt to
    determine the platform from the hard-coded names in the
    'bcftbx.platforms' module.

    Arguments:
      dirn (str): path to the data or analysis directory
      instrument (str): (optional) the instrument name
      settings (Settings):  (optional) a Settings instance
        with the configuration loaded

    Returns:
      String: either the platform or None, if the platform
        cannot be determined.
    """
    # Attempt to look up the instrument name
    platform = None
    if instrument is None:
        print "Extracting instrument name from directory name"
        try:
            datestamp,instrument,run_number,\
                flow_cell_prefix,flow_cell_id = \
                    IlluminaData.split_run_name_full(dirn)
        except Exception as ex:
            logging.warning("Unable to extract instrument name: " "%s" % ex)
    if instrument and settings:
        print "Identifying platform from instrument name"
        try:
            return settings.sequencers[instrument]
        except KeyError:
            # Instrument not listed in the settings
            logging.warning("Instrument name '%s' not found in "
                            "configuration file" % instrument)
    # Fall back to old method
    print "Identifying platform from data directory name"
    platform = platforms.get_sequencer_platform(dirn)
    if platform is None:
        logging.warning("Unable to identify platform from " "directory name")
    return platform
コード例 #2
0
     sys.stderr.write("-i/-u can't be used for multiple projects\n")
     sys.exit(1)
 # Check metadata for the projects
 print "Checking metadata for projects"
 for p in projects:
     # Check for metadata file
     has_metadata_file = os.path.exists(p.info_file)
     if not has_metadata_file:
         # No metadata file
         print "%s: missing metadata file '%s'" % (p.name,p.info_file)
         if opts.init:
             print "Initialising metadata file"
             run_name = os.path.basename(os.path.abspath(args[0]))
             if run_name.endswith('_analysis'):
                 run_name = run_name[:-9]
             platform = platforms.get_sequencer_platform(run_name)
             p.info['run'] = run_name
             p.info['platform'] = platform
             p.info['samples'] = p.prettyPrintSamples()
             p.info['paired_end'] = run.paired_end
             p.info.save(p.info_file)
     else:
         # Check for metadata that is not assigned
         missing_data = []
         for attr in ('run','user','PI','samples','library_type','organism','platform'):
             if p.info[attr] is None:
                 missing_data.append(attr)
         if missing_data:
             print "%s: missing values for %s" % (p.name,', '.join(["'%s'" % x 
                                                                    for x in missing_data]))
         else:
コード例 #3
0
ファイル: rsync_seq_data.py プロジェクト: Honglongwu/genomics
              help="mirror the source directory at the destination (update files "
              "that have changed and remove any that have been deleted i.e. "
              "rsync --delete-after)")
 p.add_option('--no-log',action='store_true',dest="no_log",default=False,
              help="write rsync output directly stdout, don't create a log file")
 options,args = p.parse_args()
 if len(args) != 2:
     p.error("input is a source directory and a destination")
 # Locate source directory (and strip any trailing slash)
 data_dir = os.path.abspath(args[0].rstrip(os.sep))
 if not os.path.isdir(data_dir):
     logging.error("%s: doesn't exist or is not a directory" % data_dir)
     sys.exit(1)
 # Determine platform
 if options.platform is None:
     platform = platforms.get_sequencer_platform(data_dir)
     if platform is None:
         logging.error("Can't determine platform: use --platform option?")
         sys.exit(1)
 else:
     platform = options.platform
 # Work out the destination
 if options.year is None:
     year = time.strftime("%Y")
 else:
     year = options.year
 destination = os.path.join(args[1],year,platform)
 # Log file
 if not options.no_log:
     log_file = "rsync.%s.log" % os.path.split(data_dir)[-1]
 else:
コード例 #4
0
              help="mirror the source directory at the destination (update files "
              "that have changed and remove any that have been deleted i.e. "
              "rsync --delete-after)")
 p.add_option('--no-log',action='store_true',dest="no_log",default=False,
              help="write rsync output directly stdout, don't create a log file")
 options,args = p.parse_args()
 if len(args) != 2:
     p.error("input is a source directory and a destination")
 # Locate source directory (and strip any trailing slash)
 data_dir = os.path.abspath(args[0].rstrip(os.sep))
 if not os.path.isdir(data_dir):
     logging.error("%s: doesn't exist or is not a directory" % data_dir)
     sys.exit(1)
 # Determine platform
 if options.platform is None:
     platform = platforms.get_sequencer_platform(data_dir)
     if platform is None:
         logging.error("Can't determine platform: use --platform option?")
         sys.exit(1)
 else:
     platform = options.platform
 # Work out the destination
 if options.year is None:
     year = time.strftime("%Y")
 else:
     year = options.year
 destination = os.path.join(args[1],year,platform)
 # Log file
 if not options.no_log:
     log_file = "rsync.%s.log" % os.path.split(data_dir)[-1]
 else: