def update_distance_matrix(config_dict, args): logger = logging.getLogger('snapperdb.snpdb.update_distance_matrix') logger.info('Inititialising SnpDB Class') snpdb = SNPdb(config_dict) snpdb.parse_config_dict(config_dict) snpdb._connect_to_snpdb() logger.info('Getting strains') strain_list, update_strain, all_strains = snpdb.get_strains() # # get_all_good_ids from snpdb2 takes a snp cutoff as well, here, we don't have a SNP cutoff so we set it arbitrarily high. snp_co = '1000000' if all_strains or len(update_strain) > 1: if update_strain: print "### Populating distance matrix: " + str( datetime.datetime.now()) snpdb.parse_args_for_update_matrix(snp_co, strain_list) if args.hpc == 'N': print '### Launching serial update_distance_matrix ' + str( datetime.datetime.now()) snpdb.check_matrix(strain_list, update_strain) snpdb.update_clusters() else: print '### Launching parallel update_distance_matrix ' + str( datetime.datetime.now()) present_stains = list(set(strain_list) - set(update_strain)) for idx, one_strain in enumerate( chunks(list(update_strain), int(args.hpc))): snpdb.write_qsubs_to_check_matrix( args, idx, one_strain, present_stains, config_dict['snpdb_name']) snpdb.check_matrix(update_strain, update_strain) else: print '### Nothing to update ' + str(datetime.datetime.now()) else: print '### Nothing to update ' + str(datetime.datetime.now())
def qsub_to_check_matrix(config_dict, args): snpdb = SNPdb(config_dict) snpdb.parse_config_dict(config_dict) snpdb._connect_to_snpdb() snp_co = '1000000' added_list = [] with open(args.added_list) as fi: for x in fi.readlines(): added_list.append(x.strip()) present_strains = [] with open(args.present_strains) as fi: for x in fi.readlines(): present_strains.append(x.strip()) strain_list = list(set(present_strains) | set(added_list)) snpdb.parse_args_for_update_matrix(snp_co, strain_list) snpdb.check_matrix(strain_list, added_list)