def plots(l_populations): for suffix in [ ## samples (concatenate) 'imiss','het','sexcheck','genome', ## SNPs (paste/join) ## 'frq','hwe','SNPQC.lmiss', 'fam','sampleQC.samples', ## 'mds', ]: bool_continue = False for population in l_populations: if not os.path.isfile('%s.%s' %(population,suffix,)): bool_continue = True break continue if bool_continue == True: print 'skip', suffix continue else: print 'concatenate', suffix fd = open('agv.%s' %(suffix),'w') fd.close() if not suffix in ['fam','sampleQC.samples',]: cmd = 'head -1 %s.%s > agv.%s' %(l_populations[0],suffix,suffix,) os.system(cmd) for population in l_populations: ## no header if suffix in ['fam','sampleQC.samples',]: cmd = "cat %s.%s >> agv.%s" %(population,suffix,suffix,) ## header else: cmd = "sed '1d' %s.%s >> agv.%s" %(population,suffix,suffix,) os.system(cmd) continue continue instanceQC = QC.main() ## instanceQC.plink_plots('agv',i_wait=0) ## samples instanceQC.histogram_imiss('agv',) instanceQC.histogram_het('agv',bool_with_stddev=False,) instanceQC.histogram_genome('agv',) instanceQC.scatter_het_call('agv',bool_with_stddev=False,) if os.path.isfile('agv.mds'): instanceQC.scatter_mds('agv') ## ## SNPs ## instanceQC.scatter_lmiss_frq('agv') ## instanceQC.histogram_lmiss('agv') ## instanceQC.histogram_frq('agv') ## instanceQC.histogram_hwe('agv') return
def process_df(df, df_info): df_info = df_info.set_index('info') try: lat, lon = float( df_info.loc['Download Location (lat/long)', 'lat']), float( df_info.loc['Download Location (lat/long)', 'lon']) except: print('Location is not given') lat, lon = 0, 0 df['datetime'] = pd.to_datetime(df.Date + ' ' + df.Time, format='%d/%m/%Y %H:%M:%S') df.rename(columns={ 'Temperature C': 'temperature', 'Depth Decibar': 'pressure', 'Depth M': 'pressure' }, inplace=True) df['temperature'] = pd.to_numeric(df['temperature']) df['pressure'] = pd.to_numeric(df['pressure']) df['temperature'] = df.apply(lambda x: round(x['temperature'], 4), axis=1) df['pressure'] = df.apply(lambda x: round(x['pressure'], 3), axis=1) df = df[['datetime', 'temperature', 'pressure']] df.loc[:, 'latitude'] = lat df.loc[:, 'longitude'] = lon df.rename(columns={ 'datetime': 'DATETIME', 'temperature': 'TEMPERATURE', 'pressure': 'PRESSURE', 'latitude': 'LATITUDE', 'longitude': 'LONGITUDE' }, inplace=True) df = QC.QC(df).df df.rename(columns={ 'DATETIME': 'datetime', 'TEMPERATURE': 'temperature', 'PRESSURE': 'pressure', 'LATITUDE': 'latitude', 'LONGITUDE': 'longitude' }, inplace=True) return df
#!/usr/bin/PYTHON import integrated_denovo_pipeline as pipeline import QC as qc import os import re # run the FASTX toolkit quality filters on Read 2 only (lots of data; will be slow) # get the degeneracy stats for the quality filtered Read 2 files pipeline.iterative_FASTQ_quality_filter(directory = '~/CWD_RADseq/', out_dir = '/qual_filtered_R2_for_DBR_distr/', out_name = 'qual_filtered_30.fastq.gz', q = 30, p = 50, read = 'R2') qc.degeneracy_r2(directory = '~/CWD_RADseq/', out_name = 'qual_filtered_30_degeneracy_check')
try: import ttk py3 = False except ImportError: import tkinter.ttk as ttk py3 = True def set_Tk_var(): global che53 che53 = StringVar() def init(top, gui, *args, **kwargs): global w, top_level, root w = gui top_level = top root = top def destroy_window(): # Function which closes the window. global top_level top_level.destroy() top_level = None if __name__ == '__main__': import QC QC.vp_start_gui()
def main(input_file_or_folder, output_folder, sampling_rate, *, swan=True, muss=True, qc=True, parallel=False, debug=False, profiling=True): """Run SWaN, MUSS and QC on an actigraph csv file Examples: Run all models >> pipenv run python main.py ABCRAW.csv ./outputs/ 80 Run only QC script >> pipenv run python main.py ABCRAW.csv ./outputs/ 80 --swan=False --muss=False Don't run QC script >> pipenv run python main.py ABCRAW.csv ./outputs/ 80 --qc=False :param input_file_or_folder: path of the input Actigraph raw csv file if a file, or an mhealth folder if a folder :param output_folder: relative or absolute path of an output folder. All algorithm outputs will be placed in a subfolder (with the same name as the input actigraph files (no extension)) of this output folder. So if you have multiple input actigraph files, you can always set this output folder to be the same as the master folder storing outputs for all of the input files. :param sampling_rate: sampling rate in Hz. :param swan: Run SWaN model :param muss: Run MUSS model :param qc: Run Quality check script :param parallel: if option is presented, muss will use multicore processing :param debug: if option is presented, all intermediate files and converted mhealth data files will be preserved otherwise, they will be deleted in the end. If error occurs during running algorithms, intermediate files and converted mhealth data files will always be preserved regardless of this option. Converted mhealth data files will be stored in `.temp` folder in the script root folder, algorithm intermediate files will be stored in the output folder corresponding to each input actigraph csv. :param profiling: Use profiling if available. """ if os.path.isfile(input_file_or_folder): auto_id = os.path.basename(input_file_or_folder).split('.')[0] intermediate_folder = './.temp/' mhealth_folder = os.path.join(intermediate_folder, auto_id) mhealth.convert_to_mhealth(input_file_or_folder, mhealth_folder) output_path = create_output_folder(output_folder, auto_id) else: intermediate_folder = None mhealth_folder = input_file_or_folder output_path = output_folder sampling_rate = float(sampling_rate) if muss: print('Running MUSS model...') muss_intermediate_folder = os.path.join(output_path, 'muss_intermediate') os.makedirs(muss_intermediate_folder, exist_ok=True) muss_feature, muss_prediction = muss_model.main( mhealth_folder, sampling_rate=sampling_rate, parallel=parallel, profiling=profiling) muss_feature.to_csv( os.path.join(muss_intermediate_folder, 'muss_feature.csv')) muss_prediction.to_csv(os.path.join(output_path, 'muss_output.csv'), index=False, header=True) if swan: print('Running SWaN model...') SWaN.main(mhealth_folder, output_path, sampling_rate=sampling_rate) if qc: print('Running Quality check...') qc_result = QC.main(mhealth_folder, output_path) qc_result.to_csv(os.path.join(output_path, 'qc_output.csv'), index=False, header=True) if not debug: if intermediate_folder != None: remove_intermediate(intermediate_folder) remove_intermediate(os.path.join(output_path, 'intermediate'), os.path.join(output_path, 'qc_intermediate'), os.path.join(output_path, 'muss_intermediate'))