Exemple #1
0
 def test_setup_analysis_directory_structure(self, mock_id, mock_parse,
                                             mock_path, mock_makedir):
     fc_dir = '/ngi2016003/201103_A00187_0332_AHFCFLDSXX'
     mock_parse.return_value = {
         'fc_dir':
         fc_dir,
         'fc_full_id':
         '201103_A00187_0332_AHFCFLDSXX',
         'projects': [{
             'project_name': 'S.One_20_01',
             'project_original_name': 'something',
             'samples': [{
                 'sample_name': 'one'
             }]
         }]
     }
     mock_id.return_value = 'P12345'
     projects_to_analyze = {}
     expected_project = 'S.One_20_01'
     got_projects = setup_analysis_directory_structure(fc_dir,
                                                       projects_to_analyze,
                                                       create_files=False)
     got_project = got_projects[
         '/lupus/ngi/staging/wildwest/ngi2016001/nobackup/NGI/DATA/P12345']
     self.assertEqual(expected_project, got_project.name)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None,
         force_update=False, workflow="NGI", already_parsed=False,
         config=None, config_file_path=None):
    if force_update: force_update = validate_force_update()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()

    if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(demux_fcid_dir)
            projects_to_analyze[p.name] = p
    else: # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
            projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                     projects_to_analyze,
                                                                     restrict_to_projects,
                                                                     restrict_to_samples,
                                                                     create_files=False,
                                                                     config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        # Don't need the dict functionality anymore; revert to list
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(project, workflow=workflow, force_overwrite=force_update)
            except Exception as e:
                print(e, file=sys.stderr)
Exemple #3
0
def main(demux_fcid_dirs,
         restrict_to_projects=None,
         restrict_to_samples=None,
         best_practice_analysis=None,
         sequencing_facility=None,
         already_parsed=False,
         force_update=False,
         delete_existing=False,
         force_create_project=False,
         config=None,
         config_file_path=None):
    if force_update: force_update = validate_force_update()
    if delete_existing: delete_existing = validate_delete_existing()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    projects_to_analyze = dict()
    if already_parsed:  # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(
                demux_fcid_dir, force_create_project=force_create_project)
            projects_to_analyze[p.name] = p
    else:  # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            projects_to_analyze = setup_analysis_directory_structure(
                demux_fcid_dir,
                projects_to_analyze,
                restrict_to_projects,
                restrict_to_samples,
                config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(
                    project,
                    best_practice_analysis=best_practice_analysis,
                    sequencing_facility=sequencing_facility,
                    force_overwrite=force_update,
                    delete_existing=delete_existing)
            except Exception as e:
                print(e, file=sys.stderr)
def main(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None,
         best_practice_analysis=None, sequencing_facility=None,
         already_parsed=False,
         force_update=False, delete_existing=False,
         force_create_project=False,
         config=None, config_file_path=None):
    if force_update: force_update = validate_force_update()
    if delete_existing: delete_existing = validate_delete_existing()
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    projects_to_analyze = dict()
    if already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
        for demux_fcid_dir in demux_fcid_dirs_set:
            p = recreate_project_from_filesystem(demux_fcid_dir,
                                                 force_create_project=force_create_project)
            projects_to_analyze[p.name] = p
    else: # Raw illumina flowcell
        for demux_fcid_dir in demux_fcid_dirs_set:
            projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                     projects_to_analyze,
                                                                     restrict_to_projects,
                                                                     restrict_to_samples,
                                                                     config=config)
    if not projects_to_analyze:
        sys.exit("Quitting: no projects found to process in flowcells {}"
                 "or there was an error gathering required "
                 "information.".format(",".join(demux_fcid_dirs_set)))
    else:
        projects_to_analyze = projects_to_analyze.values()
        for project in projects_to_analyze:
            try:
                create_charon_entries_from_project(project, best_practice_analysis=best_practice_analysis,
                                                   sequencing_facility=sequencing_facility,
                                                   force_overwrite=force_update,
                                                   delete_existing=delete_existing)
            except Exception as e:
                print(e, file=sys.stderr)
 if args.delete_existing: args.delete_existing = validate_delete_existing()
 if not args.restrict_to_projects: args.restrict_to_projects = []
 if not args.restrict_to_samples: args.restrict_to_samples = []
 organize_fc_dirs_set = set(args.organize_fc_dirs)
 projects_to_analyze = dict()
 ## NOTE this bit of code not currently in use but could use later
 #if args.already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
 #    for organize_fc_dir in organize_fc_dirs_set:
 #        p = recreate_project_from_filesystem(organize_fc_dir,
 #                                             force_create_project=args.force_create_project)
 #        projects_to_analyze[p.name] = p
 #else: # Raw illumina flowcell
 for organize_fc_dir in organize_fc_dirs_set:
     projects_to_analyze = setup_analysis_directory_structure(fc_dir=organize_fc_dir,
                                                              projects_to_analyze=projects_to_analyze,
                                                              restrict_to_projects=args.restrict_to_projects,
                                                              restrict_to_samples=args.restrict_to_samples,
                                                              fallback_libprep=args.fallback_libprep,
                                                              quiet=args.quiet)
 if not projects_to_analyze:
     raise ValueError('No projects found to process in flowcells '
                      '"{}" or there was an error gathering required '
                      'information.'.format(",".join(organize_fc_dirs_set)))
 else:
     projects_to_analyze = projects_to_analyze.values()
     for project in projects_to_analyze:
         try:
             create_charon_entries_from_project(project,
                                                best_practice_analysis=args.best_practice_analysis,
                                                sequencing_facility=args.sequencing_facility,
                                                force_overwrite=args.force_update,
                                                delete_existing=args.delete_existing)