def connect(): """ Connect to the PostgreSQL database server""" conn = None # Read connection parameters params = config() # Connect to the PostgreSQL server print_color("Connecting to the PostgreSQL database...", "HEADER") try: conn = psycopg2.connect(**params) finally: return conn
def run_commands(conn, commands): """ Running SQL commands in PostgreSQL database """ # Create a cursor cur = conn.cursor() print_color("Running commands...", "WARNING") # Run commands one by one for command in commands: cur.execute(command) print_color("Commands ran successfully!", "OKGREEN") # Close communication with tht PostgreSQL database server cur.close() # Commit the changes conn.commit()
def start(self): self.integrate() if self.args['disease'] == 'Y': find_disease_id = False for disease_id in self.args['disease_ids'].split(';'): if glob.glob( '{disease_db}/Disease_BackGround/{disease_id}__*txt'. format(**dict(self.__dict__, **locals()))): find_disease_id = True utils.print_color('> Disease Analysis', 'white') self.disease(disease_id) break if not find_disease_id: print '[warn] disease analysis will not to do, cause the disease "{}" not in database yet'.format( self.args['disease_name'])
def _log_graph(): path = '/tmp/interpreter' with tf.Session() as sess: tf.global_variables_initializer() tf.summary.FileWriter(path, sess.graph) ut.print_color(os.path.abspath(path), color=33)
'cars', 'columns': [{ 'name': 'id', 'type': 'BIGSERIAL', 'constraints': ['PRIMARY KEY'] }, { 'name': 'make', 'type': 'VARCHAR(50)', 'constraints': ['NOT NULL'] }, { 'name': 'model', 'type': 'VARCHAR(50)', 'constraints': ['NOT NULL'] }, { 'name': 'year', 'type': 'INTERVAL YEAR', 'constraints': ['NOT NULL'] }] }] drop_cmds = parse_commands(conn, drop_pairings) create_cmds = parse_commands(conn, create_pairings) run_commands(conn, drop_cmds) run_commands(conn, create_cmds) # ------------------------------------------------------ disconnect(conn) except (Exception, psycopg2.DatabaseError) as error: print_color(error, "FAIL")
def start(self): # # tree of release directory # {job}/ReleaseResult # ├── Data # │ ├── BamData # │ └── RawData # ├── FinalResult # │ ├── 1.FilterDB-... # │ ├── 2. ... # ├── PrimaryAnalysis # │ ├── FilterAnalysis # │ └── SampleVariation # └── Readme dir_counter = iter('一 二 三 四 五 六 七 八 九 十'.split()) dir_map = { 'CandidateGene': 'CandidateGene-候选基因列表', 'FilterDB': 'FilterDB-突变位点筛选', 'ACMG': 'ACMG-突变位点有害性分类', 'FilterSV_CNV': 'FilterSV_CNV-结构变异有害性分析', 'Noncoding': 'Noncoding-非编码区突变位点筛选', 'ModelF': 'ModelF-基于家系样本筛选', 'Share': 'Share-共有突变基因筛选', 'Denovo': 'Denovo-新生突变筛选', 'Linkage': 'Linkage-连锁分析', 'ROH': 'ROH-纯合子区域分析', 'Network': 'Network-候选基因相关性排序', 'Pathway': 'Pathway-候选基因富集分析', 'PPI': 'PPI-蛋白相互作用分析', 'SiteAS': 'SiteAS-基于位点关联分析', 'GeneAS': 'GeneAS-基于基因的关联分析', 'BriefResults': '{analydir}/Advance/{newjob}/BriefResults'.format(**self.__dict__), 'BriefResults': '{analydir}/Advance/{newjob}/BriefResults'.format(**self.__dict__), 'Data': '{outdir}/ReleaseResult/Data'.format(**self.__dict__), 'PrimaryAnalysis': '{outdir}/ReleaseResult/PrimaryAnalysis'.format(**self.__dict__), 'FinalResult': '{outdir}/ReleaseResult/FinalResult'.format(**self.__dict__), 'Readme': '{outdir}/ReleaseResult/Readme/'.format(**self.__dict__), } self.__dict__.update(**dir_map) # print self.ANALY_DICT.keys() # exit() utils.print_color('release {} samples:\n{}'.format( len(self.qc_lists), self.qc_lists.keys())) # RawData if True: self.release_fastq('raw') self.context['raw_data'] = dir_counter.next() # QC if self.ANALY_DICT['quality_control_keep_clean']: self.release_fastq('clean') self.context['clean_data'] = dir_counter.next() # Mapping if self.ANALY_DICT['mapping']: self.release_mapping() self.context['bam_data'] = dir_counter.next() # SNP/INDEL if self.ANALY_DICT['snpindel_call']: self.release_snp_indel('snp') self.release_snp_indel('indel') if 'primary_result' not in self.context: self.context['primary_result'] = dir_counter.next() self.context['snpindel_result'] = True # SV if self.ANALY_DICT['sv_call']: self.release_sv_cnv('sv') if 'primary_result' not in self.context: self.context['primary_result'] = dir_counter.next() self.context['sv_result'] = True # CNV if self.ANALY_DICT['cnv_call']: self.release_sv_cnv('cnv') if 'primary_result' not in self.context: self.context['primary_result'] = dir_counter.next() self.context['cnv_result'] = True # Circos if self.ANALY_DICT['cnv_call_freec'] and self.ANALY_DICT['sv_call']: self.release_circos() self.context['circos_result'] = True # Adcance if any( (self.ANALY_DICT['filter_acmg'], self.ANALY_DICT['filter_model'], self.ANALY_DICT['share_compare'], self.ANALY_DICT['denovo'])): self.release_candidate_gene() self.context['candidate_gene'] = { 'name': '{CandidateGene}'.format(**self.__dict__) } self.context['candidate_gene'].update( {'counter': self.final_result_counter}) # FilterDB if self.ANALY_DICT['filter_db'] or self.ANALY_DICT['filter_acmg']: self.final_result_counter += 1 self.release_filter_db('snp') self.release_filter_db('indel') if 'filter_analysis' not in self.context: self.context['filter_analysis'] = dir_counter.next() self.context['filterdb'] = { 'name': '{FilterDB}'.format(**self.__dict__) } self.context['filterdb'].update( {'counter': self.final_result_counter}) # ACMG if self.ANALY_DICT['filter_acmg']: self.final_result_counter += 1 self.release_acmg() self.context['filter_acmg'] = { 'name': '{ACMG}'.format(**self.__dict__) } self.context['filter_acmg'].update( {'counter': self.final_result_counter}) # FilterSV/FilterCNV if self.ANALY_DICT['filter_sv'] or self.ANALY_DICT['filter_cnv']: self.final_result_counter += 1 if 'filter_analysis' not in self.context: self.context['filter_analysis'] = dir_counter.next() self.context['filter_sv_cnv'] = { 'name': '{FilterSV_CNV}'.format(**self.__dict__) } self.context['filter_sv_cnv'].update( {'counter': self.final_result_counter}) if self.ANALY_DICT['filter_sv']: self.release_filter_sv_cnv('sv') if self.ANALY_DICT['filter_cnv']: self.release_filter_sv_cnv('cnv') # Noncoding if self.ANALY_DICT['filter_noncoding']: self.final_result_counter += 1 self.release_filter_noncoding() self.context['filter_noncoding'] = { 'name': '{Noncoding}'.format(**self.__dict__) } self.context['filter_noncoding'].update( {'counter': self.final_result_counter}) # ModelF if self.ANALY_DICT[ 'filter_model'] and not self.ANALY_DICT['share_compare']: self.final_result_counter += 1 self.release_filter_model() self.context['filter_model'] = { 'name': '{ModelF}'.format(**self.__dict__) } self.context['filter_model'].update( {'counter': self.final_result_counter}) # Share if self.ANALY_DICT['share_compare']: self.final_result_counter += 1 self.release_share_compare() self.context['share_compare'] = { 'name': '{Share}'.format(**self.__dict__) } self.context['share_compare'].update( {'counter': self.final_result_counter}) # Denovo if self.ANALY_DICT['denovo']: self.final_result_counter += 1 self.context['denovo'] = { 'name': '{Denovo}'.format(**self.__dict__) } self.context['denovo'].update( {'counter': self.final_result_counter}) if any((self.ANALY_DICT['denovo_samtools'], self.ANALY_DICT['denovo_triodenovo'], self.ANALY_DICT['denovo_denovogear'])): self.release_denovo() if self.ANALY_DICT['denovo_sv']: self.release_denovo_sv_cnv('sv') if self.ANALY_DICT['denovo_cnv']: self.release_denovo_sv_cnv('cnv') # Linkage if self.ANALY_DICT['linkage']: self.final_result_counter += 1 self.release_linkage() self.context['linkage'] = { 'name': '{Linkage}'.format(**self.__dict__) } self.context['linkage'].update( {'counter': self.final_result_counter}) # ROH if self.ANALY_DICT['roh']: self.final_result_counter += 1 self.release_roh() self.context['roh'] = {'name': '{ROH}'.format(**self.__dict__)} self.context['roh'].update({'counter': self.final_result_counter}) # Network if self.ANALY_DICT['phenolyzer']: self.final_result_counter += 1 self.release_network() self.context['network'] = { 'name': '{Network}'.format(**self.__dict__) } self.context['network'].update( {'counter': self.final_result_counter}) # Pathway if self.ANALY_DICT['pathway']: self.final_result_counter += 1 self.release_pathway() self.context['pathway'] = { 'name': '{Pathway}'.format(**self.__dict__) } self.context['pathway'].update( {'counter': self.final_result_counter}) # PPI if self.ANALY_DICT['ppi']: self.final_result_counter += 1 self.release_ppi() self.context['ppi'] = {'name': '{PPI}'.format(**self.__dict__)} self.context['ppi'].update({'counter': self.final_result_counter}) # SiteAS if self.ANALY_DICT['site_association']: self.final_result_counter += 1 # self.release_site_as() self.context['site_as'] = { 'name': '{SiteAS}'.format(**self.__dict__) } self.context['site_as'].update( {'counter': self.final_result_counter}) # GeneAS if self.ANALY_DICT['gene_association']: self.final_result_counter += 1 # self.release_gene_as() self.context['gene_as'] = { 'name': '{GeneAS}'.format(**self.__dict__) } self.context['gene_as'].update( {'counter': self.final_result_counter}) self.context['final_result'] = dir_counter.next() self.context['appendix'] = dir_counter.next() # Readme self.make_readme()
def start(self): # temp advance_dirs = { 'Merged_vcf': '{analydir}/Advance/{newjob}/Merged_vcf', 'ACMG': '{analydir}/Advance/{newjob}/ACMG', 'FilterSV': '{analydir}/Advance/{newjob}/FilterSV', 'FilterCNV': '{analydir}/Advance/{newjob}/FilterCNV', 'Noncoding': '{analydir}/Advance/{newjob}/Noncoding', 'ModelF': '{analydir}/Advance/{newjob}/ModelF', 'Share': '{analydir}/Advance/{newjob}/Share', 'Denovo': '{analydir}/Advance/{newjob}/Denovo', 'Linkage': '{analydir}/Advance/{newjob}/Linkage', 'ROH': '{analydir}/Advance/{newjob}/ROH', 'Network': '{analydir}/Advance/{newjob}/Network', 'Pathway': '{analydir}/Advance/{newjob}/Pathway', 'PPI': '{analydir}/Advance/{newjob}/PPI', 'HLA': '{analydir}/Advance/{newjob}/HLA', 'SiteAS': '{analydir}/Advance/{newjob}/SiteAS', 'GeneAS': '{analydir}/Advance/{newjob}/GeneAS', 'IntegrateResult': '{analydir}/Advance/{newjob}/IntegrateResult', 'Disease': '{analydir}/Advance/{newjob}/Disease', 'BriefResults': '{analydir}/Advance/{newjob}/BriefResults', } for k, v in advance_dirs.iteritems(): self.args.update({k: v.format(**self.args)}) # print self.args['SiteAS'] # exit() # print self.analy_array print 'hello, {}'.format(self.username) # Require rawdata or not qc_status = utils.get_status('qc', self.startpoint, config.ANALYSIS_POINTS) mapping_status = utils.get_status('bwa_mem', self.startpoint, config.ANALYSIS_POINTS) print 'qc status:', qc_status print 'mapping status:', mapping_status ANALY_DICT = utils.get_analysis_dict(self.analy_array, config.ANALYSIS_CODE) self.args.update({'ANALY_DICT': ANALY_DICT}) # print ANALY_DICT.keys();exit() softwares = utils.get_softwares(self.analy_array, self.args['ANALY_DICT'], self.args, self.seqstrag) # pprint(softwares);exit() self.args.update({'softwares': softwares}) # check inputs self.queues = utils.check_queues(self.queues, self.username) self.args.update({'queues': self.queues}) # use sentieon specific queues if needed if 'sentieon' in softwares.values(): print 'add sentieon_queues' sentieon_queues = self.queues if config.CONFIG.has_option('resource', 'sentieon_queues'): sentieon_queues = config.CONFIG.get( 'resource', 'sentieon_queues').split(',') sentieon_queues = utils.check_queues(sentieon_queues, self.username) if not sentieon_queues: sentieon_queues = self.queues self.args.update({'sentieon_queues': sentieon_queues}) # print self.args['sentieon_queues'];exit() # print sentieon_queues;exit() utils.check_analy_array(self.seqstrag, self.analy_array, config.ANALYSIS_CODE) utils.check_files(self.pn, self.samp_info, self.samp_list) newTR = utils.check_target_region(config.CONFIG, self.seqstrag, self.refgenome, self.rawTR) self.args.update({'TR': newTR}) print 'analysis items:' for analysis_code in self.analy_array: print utils.color_text( '{:4} {}'.format(analysis_code, config.ANALYSIS_CODE[analysis_code][0]), 'yellow') # Analysis start point if self.startpoint: if self.startpoint in config.ANALYSIS_POINTS: print 'start point: {}'.format( utils.color_text(self.startpoint)) else: print '[error] invalid startpoint: {}'.format( utils.color_text(self.startpoint)) print 'maybe you want to choose: {}'.format( utils.color_text( process.extractOne(self.startpoint, config.ANALYSIS_POINTS.keys())[0], 'cyan')) print 'available startpoints are as follows:\n {}'.format( ' '.join(config.ANALYSIS_POINTS.keys())) exit(1) is_advance = max(self.analy_array) > 6.1 project = utils.Project(self.analydir, self.samp_info, self.samp_info_done, self.samp_list, self.qc_list, qc_status, mapping_status, is_advance) # Extract sample_info print 'extract sample informations...' fenqi, tissue, disease_name, sample_infos, sample_infos_all, sample_done = project.get_sample_infos( self.samp_list, self.samp_info, self.samp_info_done, is_advance) database = '{}/project/DisGeNet.json'.format( config.CONFIG.get('software', 'soft_dir')) disease_ids = utils.get_disease_id(disease_name, database) self.args.update({ 'disease_name': disease_name, 'disease_ids': disease_ids, }) sample_infos_waiting = { sampleid: infos for sampleid, infos in sample_infos.iteritems() if sampleid not in sample_done } self.args.update({'sample_infos_waiting': sample_infos_waiting}) # print sample_infos_waiting # exit() # print 'fenqi:', fenqi # print 'tissue:', tissue # exit() sample_lists = project.get_sample_lists # print sample_lists # print sample_infos.keys() # print sample_infos_all.keys() # for sample in sample_infos: # print sample, sample_infos[sample]['familyid'] # exit() if mapping_status == 'waiting': sample_lists = project.update_qc_list() print ' report number: {}'.format(utils.color_text(fenqi)) if disease_name: print ' disease name: {}'.format(utils.color_text(disease_name)) print ' disease id: {}'.format(utils.color_text(disease_ids)) if tissue: print ' tissue: {}'.format(utils.color_text(tissue)) print ' samples ({}): {}'.format( len(sample_infos), utils.color_text(sample_infos.keys())) if sample_done: print ' samples done({}): {}'.format( len(sample_done), utils.color_text(sample_done)) # Update qc_list and extract sample_list # print 'update qc_list...' # print json.dumps(sample_lists, indent=2) # set memory according seqstrag print 'set analysis memory...' if self.seqstrag == 'WGS': print 'upate memory for WGS...' for analysis, memory in config.ANALYSIS_MEM_WGS.items(): if analysis in config.ANALYSIS_POINTS: config.ANALYSIS_POINTS[analysis][0] = memory # exit() # =========================================================== # =========================================================== print '>>> pipeline start...' mutation_soft, sv_soft, cnv_soft, denovo_soft = [ softwares[each] for each in ('mutation', 'sv', 'cnv', 'denovo') ] print ' mutation_soft:{}, sv_soft:{}, cnv_soft:{}, denovo_soft:{}'.format( mutation_soft, sv_soft, cnv_soft, denovo_soft) # QC if ANALY_DICT['quality_control'] and qc_status == 'waiting': utils.print_color('> QC', 'white') QC(self.args, self.jobs, self.orders, sample_lists, config).start() # Mapping if ANALY_DICT['mapping']: utils.print_color('> Mapping', 'white') Mapping(self.args, self.jobs, self.orders, sample_lists, sample_infos, config, qc_status, mapping_status).start() # Mutation if ANALY_DICT['snpindel_call']: utils.print_color('> Mutation', 'white') Mutation(self.args, self.jobs, self.orders, sample_lists, sample_infos, config).start() # SV if ANALY_DICT['sv_call']: utils.print_color('> SV', 'white') SV(self.args, self.jobs, self.orders, sample_infos, config).start() # CNV if ANALY_DICT['cnv_call']: utils.print_color('> CNV', 'white') CNV(self.args, self.jobs, self.orders, sample_infos, config).start() # FilterDB if ANALY_DICT['filter']: utils.print_color('> FilterDB', 'white') FilterDB(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, sample_infos, config, disease_name, tissue, ANALY_DICT).start() # ModelF if ANALY_DICT['filter_model']: utils.print_color('> Model', 'white') FilterModel(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, sample_infos, config).start() # Denovo if ANALY_DICT['denovo']: utils.print_color('> Denovo', 'white') Denovo(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, denovo_soft, sample_infos, config, ANALY_DICT).start() # Linkage if ANALY_DICT['linkage']: utils.print_color('> Linkage', 'white') Linkage(self.args, self.jobs, self.orders, mutation_soft, sv_soft, cnv_soft, denovo_soft, sample_infos_all, config, ANALY_DICT).start() # IntegrateResult if any(ANALY_DICT[analysis] for analysis in ['filter', 'filter_model', 'denovo', 'phenolyzer']): utils.print_color('> IntegrateResult', 'white') IntegrateResult(self.args, self.jobs, self.orders, config).start() # ROH if ANALY_DICT['roh']: utils.print_color('> ROH', 'white') ROH(self.args, self.jobs, self.orders, sample_infos, mutation_soft, config).start() # OTHER other = Other(self.args, self.jobs, self.orders, config, disease_name) # IBD if any(ANALY_DICT[each] for each in ['filter_model', 'linkage', 'denovo' ]) and len(sample_infos_waiting) > 1: utils.print_color('> IBD', 'white') other.ibd() # Network if ANALY_DICT['phenolyzer']: utils.print_color('> Phenolyzer', 'white') other.phenolyzer() # Pathway if ANALY_DICT['pathway']: utils.print_color('> Pathway', 'white') other.pathway() # PPI if ANALY_DICT['ppi']: utils.print_color('> PPI', 'white') other.ppi() # SiteAS if ANALY_DICT['site_association']: utils.print_color('> SiteAS', 'white') Association(self.args, self.jobs, self.orders, config).site_association() # GeneAS if ANALY_DICT['gene_association']: utils.print_color('> GeneAS', 'white') Association(self.args, self.jobs, self.orders, config).gene_association() # HLA if ANALY_DICT['hla']: utils.print_color('> HLA', 'white') HLA(self.args, self.jobs, self.orders, sample_lists, sample_infos, config, qc_status).start() # result and report utils.print_color('> Result', 'white') Result(self.args, self.jobs, self.orders, config).start() utils.print_color('> Report', 'white') Report(self.args, self.jobs, self.orders, config).start() # job summary print 'lenght of jobs waiting/total: {}/{}'.format( len([job for job in self.jobs if job.get('status') == 'waiting']), len(self.jobs)) utils.write_job(self.analydir, self.newjob, self.jobs, self.orders) print '{:-^80}'.format(' all done ')