Ejemplo n.º 1
0
def plots(l_populations):

    for suffix in [
        ## samples (concatenate)
        'imiss','het','sexcheck','genome',
        ## SNPs (paste/join)
##        'frq','hwe','SNPQC.lmiss',
        'fam','sampleQC.samples',
##        'mds',
        ]:
        
        bool_continue = False
        for population in l_populations:
            if not os.path.isfile('%s.%s' %(population,suffix,)):
                bool_continue = True
                break
            continue
        if bool_continue == True:
            print 'skip', suffix
            continue
        else:
            print 'concatenate', suffix

        fd = open('agv.%s' %(suffix),'w')
        fd.close()

        if not suffix in ['fam','sampleQC.samples',]:
            cmd = 'head -1 %s.%s > agv.%s' %(l_populations[0],suffix,suffix,)
            os.system(cmd)
        for population in l_populations:
            ## no header
            if suffix in ['fam','sampleQC.samples',]:
                cmd = "cat %s.%s >> agv.%s" %(population,suffix,suffix,)
            ## header
            else:
                cmd = "sed '1d' %s.%s >> agv.%s" %(population,suffix,suffix,)
            os.system(cmd)
            continue

        continue

    instanceQC = QC.main()
##    instanceQC.plink_plots('agv',i_wait=0)

    ## samples
    instanceQC.histogram_imiss('agv',)
    instanceQC.histogram_het('agv',bool_with_stddev=False,)
    instanceQC.histogram_genome('agv',)
    instanceQC.scatter_het_call('agv',bool_with_stddev=False,)
    if os.path.isfile('agv.mds'):
        instanceQC.scatter_mds('agv')

##    ## SNPs
##    instanceQC.scatter_lmiss_frq('agv')
##    instanceQC.histogram_lmiss('agv')
##    instanceQC.histogram_frq('agv')
##    instanceQC.histogram_hwe('agv')

    return
Ejemplo n.º 2
0
def process_df(df, df_info):
    df_info = df_info.set_index('info')
    try:
        lat, lon = float(
            df_info.loc['Download Location (lat/long)', 'lat']), float(
                df_info.loc['Download Location (lat/long)', 'lon'])
    except:
        print('Location is not given')
        lat, lon = 0, 0
    df['datetime'] = pd.to_datetime(df.Date + ' ' + df.Time,
                                    format='%d/%m/%Y %H:%M:%S')
    df.rename(columns={
        'Temperature C': 'temperature',
        'Depth Decibar': 'pressure',
        'Depth M': 'pressure'
    },
              inplace=True)
    df['temperature'] = pd.to_numeric(df['temperature'])
    df['pressure'] = pd.to_numeric(df['pressure'])
    df['temperature'] = df.apply(lambda x: round(x['temperature'], 4), axis=1)
    df['pressure'] = df.apply(lambda x: round(x['pressure'], 3), axis=1)
    df = df[['datetime', 'temperature', 'pressure']]
    df.loc[:, 'latitude'] = lat
    df.loc[:, 'longitude'] = lon
    df.rename(columns={
        'datetime': 'DATETIME',
        'temperature': 'TEMPERATURE',
        'pressure': 'PRESSURE',
        'latitude': 'LATITUDE',
        'longitude': 'LONGITUDE'
    },
              inplace=True)
    df = QC.QC(df).df
    df.rename(columns={
        'DATETIME': 'datetime',
        'TEMPERATURE': 'temperature',
        'PRESSURE': 'pressure',
        'LATITUDE': 'latitude',
        'LONGITUDE': 'longitude'
    },
              inplace=True)
    return df
Ejemplo n.º 3
0
#!/usr/bin/PYTHON

import integrated_denovo_pipeline as pipeline
import QC as qc
import os
import re

# run the FASTX toolkit quality filters on Read 2 only (lots of data; will be slow)
# get the degeneracy stats for the quality filtered Read 2 files
pipeline.iterative_FASTQ_quality_filter(directory = '~/CWD_RADseq/', out_dir = '/qual_filtered_R2_for_DBR_distr/', out_name = 'qual_filtered_30.fastq.gz', q = 30, p = 50, read = 'R2')
qc.degeneracy_r2(directory = '~/CWD_RADseq/', out_name = 'qual_filtered_30_degeneracy_check')
Ejemplo n.º 4
0
try:
    import ttk
    py3 = False
except ImportError:
    import tkinter.ttk as ttk
    py3 = True


def set_Tk_var():
    global che53
    che53 = StringVar()


def init(top, gui, *args, **kwargs):
    global w, top_level, root
    w = gui
    top_level = top
    root = top


def destroy_window():
    # Function which closes the window.
    global top_level
    top_level.destroy()
    top_level = None


if __name__ == '__main__':
    import QC
    QC.vp_start_gui()
Ejemplo n.º 5
0
def main(input_file_or_folder,
         output_folder,
         sampling_rate,
         *,
         swan=True,
         muss=True,
         qc=True,
         parallel=False,
         debug=False,
         profiling=True):
    """Run SWaN, MUSS and QC on an actigraph csv file

    Examples:

        Run all models

            >> pipenv run python main.py ABCRAW.csv ./outputs/ 80

        Run only QC script

            >> pipenv run python main.py ABCRAW.csv ./outputs/ 80 --swan=False --muss=False

        Don't run QC script

            >> pipenv run python main.py ABCRAW.csv ./outputs/ 80 --qc=False

    :param input_file_or_folder: path of the input Actigraph raw csv file if a file, or an mhealth folder if a folder
    :param output_folder: relative or absolute path of an output folder. All algorithm outputs will be placed in a subfolder (with the same name as the input actigraph files (no extension)) of this output folder. So if you have multiple input actigraph files, you can always set this output folder to be the same as the master folder storing outputs for all of the input files.
    :param sampling_rate: sampling rate in Hz.
    :param swan: Run SWaN model
    :param muss: Run MUSS model
    :param qc: Run Quality check script
    :param parallel: if option is presented, muss will use multicore processing
    :param debug: if option is presented, all intermediate files and converted mhealth data files will be preserved otherwise, they will be deleted in the end. If error occurs during running algorithms, intermediate files and converted mhealth data files will always be preserved regardless of this option. Converted mhealth data files will be stored in `.temp` folder in the script root folder, algorithm intermediate files will be stored in the output folder corresponding to each input actigraph csv.
    :param profiling: Use profiling if available.
    """

    if os.path.isfile(input_file_or_folder):
        auto_id = os.path.basename(input_file_or_folder).split('.')[0]
        intermediate_folder = './.temp/'
        mhealth_folder = os.path.join(intermediate_folder, auto_id)
        mhealth.convert_to_mhealth(input_file_or_folder, mhealth_folder)
        output_path = create_output_folder(output_folder, auto_id)
    else:
        intermediate_folder = None
        mhealth_folder = input_file_or_folder
        output_path = output_folder

    sampling_rate = float(sampling_rate)
    if muss:
        print('Running MUSS model...')
        muss_intermediate_folder = os.path.join(output_path,
                                                'muss_intermediate')
        os.makedirs(muss_intermediate_folder, exist_ok=True)
        muss_feature, muss_prediction = muss_model.main(
            mhealth_folder,
            sampling_rate=sampling_rate,
            parallel=parallel,
            profiling=profiling)
        muss_feature.to_csv(
            os.path.join(muss_intermediate_folder, 'muss_feature.csv'))
        muss_prediction.to_csv(os.path.join(output_path, 'muss_output.csv'),
                               index=False,
                               header=True)
    if swan:
        print('Running SWaN model...')
        SWaN.main(mhealth_folder, output_path, sampling_rate=sampling_rate)
    if qc:
        print('Running Quality check...')
        qc_result = QC.main(mhealth_folder, output_path)
        qc_result.to_csv(os.path.join(output_path, 'qc_output.csv'),
                         index=False,
                         header=True)

    if not debug:
        if intermediate_folder != None:
            remove_intermediate(intermediate_folder)
        remove_intermediate(os.path.join(output_path, 'intermediate'),
                            os.path.join(output_path, 'qc_intermediate'),
                            os.path.join(output_path, 'muss_intermediate'))