def MakeReference(self):

        if not os.path.isfile(self.strRefFile):
            with open(self.strBarcodeFile) as Barcode, \
                open(self.strTargetSeqFile) as Target, \
                open(self.strReferenceSeqFile) as Ref, \
                open(self.strRefFile, 'w') as Output:

                listBarcode = Helper.RemoveNullAndBadKeyword(Barcode)
                listTarget  = Helper.RemoveNullAndBadKeyword(Target)
                listRef     = Helper.RemoveNullAndBadKeyword(Ref)

                ## defensive
                assert len(listBarcode) == len(listTarget) == len(listRef), 'Barcode, Target and Reference must be a same row number.'

                listName = []
                for strBar, strTar in zip(listBarcode, listTarget):
                    strBar = strBar.replace('\n', '').replace('\r', '').strip().upper()
                    strTar = strTar.replace('\n', '').replace('\r', '').strip().upper()

                    Helper.CheckIntegrity(self.strBarcodeFile, strBar) ## defensive
                    Helper.CheckIntegrity(self.strBarcodeFile, strTar) ## defensive

                    listName.append(strBar + ':' + strTar + '\n')
                
                for i, strRow in enumerate(listRef):
                    strRow = strRow.replace('\r', '').strip().upper()
                    Output.write('>' + listName[i] + strRow + '\n')
    def MakeReference(self):

        with open(self.strBarcodeFile) as Barcode, \
            open(self.strReferenceSeqFile) as Ref, \
            open(self.strRefFile, 'w') as Output:

            listBarcode = Helper.RemoveNullAndBadKeyword(Barcode)
            listRef = Helper.RemoveNullAndBadKeyword(Ref)

            ## defensive
            assert len(listBarcode) == len(
                listRef), 'Barcode and Reference must be a same row number.'

            dictBarcode = {}

            for strBarcode in listBarcode:
                strBarcode = strBarcode.replace('\n', '').replace('\r',
                                                                  '').upper()
                Helper.CheckIntegrity(self.strBarcodeFile,
                                      strBarcode)  ## defensive
                listBarcode = strBarcode.split(':')
                strBarSample = listBarcode[0]
                strBarcode = listBarcode[1]
                dictBarcode[strBarSample] = strBarcode

            for strRef in listRef:
                strRef = strRef.replace('\n', '').replace('\r', '').upper()
                Helper.CheckIntegrity(self.strBarcodeFile,
                                      strRef)  ## defensive
                listRef = strRef.split(':')
                strRefSample = listRef[0]
                strRef = listRef[1]

                try:
                    sBarcode = dictBarcode[strRefSample]
                    Output.write('%s\t%s\t%s\n' %
                                 (strRefSample, sBarcode, strRef))
                except KeyError:
                    logging.error('no matching')
                    logging.error(strRefSample, strRef)
def Main():
    print('BaseEdit program start: %s' % datetime.now())

    sCmd = (
        "BaseEdit frequency analyzer\n\n./Run_BaseEdit_freq.py -t 15 -w 16-48 --indel_check_pos 39-40 --target_ref_alt A,T --PAM_seq NGG --PAM_pos 43-45 --Guide_pos 23-42"
        " --gap_open -10 --gap_extend 1\n\n"
        "The sequence position is the one base position (start:1)\n"
        "1: Barcode\n"
        "2: Base target window (end pos = PAM pos +3)\n"
        "3: Indel check pos\n"
        "4: PAM pos\n"
        "5: Guide pos (without PAM)\n\n"
        "TATCTCTATCAGCACACAAGCATGCAATCACCTTGGGTCCAAAGGTCC\n"
        "<------1------><----------------2--------------->\n"
        "                                     <3>  <4>   \n"
        "                      <---------5-------->      \n\n")

    parser = OptionParser(sCmd)

    parser.add_option("-t",
                      "--thread",
                      default="1",
                      type="int",
                      dest="multicore",
                      help="multiprocessing number")
    parser.add_option('--gap_open',
                      default='-10',
                      type='float',
                      dest='gap_open',
                      help='gap open: -100~0')
    parser.add_option('--gap_extend',
                      default='1',
                      type='float',
                      dest='gap_extend',
                      help='gap extend: 1~100')
    parser.add_option("-w",
                      "--target_window",
                      type="str",
                      dest="target_window",
                      help="a window size for target sequence : 20-48")
    parser.add_option(
        "--indel_check_pos",
        type="str",
        dest="indel_check_pos",
        help=
        "indel check position to filter : 39-40; insertion 39, deletion 39 & 40"
    )
    parser.add_option("--target_ref_alt",
                      type="str",
                      dest="target_ref_alt",
                      help="Ref 'A' is changed to Alt 'T': A,T")
    parser.add_option("--PAM_seq",
                      type="str",
                      dest="PAM_seq",
                      help="PAM sequence: NGG, NGC ...")
    parser.add_option(
        "--PAM_pos",
        type="str",
        dest="PAM_pos",
        help="PAM position range in the reference seqeunce : 43-45")
    parser.add_option(
        "--Guide_pos",
        type="str",
        dest="Guide_pos",
        help="Guide position range in the reference seqeunce : 23-42")
    parser.add_option('--python',
                      dest='python',
                      help='The python path including the CRISPResso2')
    parser.add_option('--user',
                      dest='user_name',
                      help='The user name with no space')
    parser.add_option('--project',
                      dest='project_name',
                      help='The project name with no space')

    options, args = parser.parse_args()

    InstInitFolder = InitialFolder(options.user_name, options.project_name,
                                   os.path.basename(__file__))
    InstInitFolder.MakeDefaultFolder()
    InstInitFolder.MakeInputFolder()
    InstInitFolder.MakeOutputFolder()

    logging.basicConfig(
        format='%(process)d %(levelname)s %(asctime)s : %(message)s',
        level=logging.DEBUG,
        filename=InstInitFolder.strLogPath,
        filemode='a')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logging.info('Program start')
    if options.multicore > 15:
        logging.warning('Optimal treads <= 15')
    logging.info(str(options))

    with open(InstInitFolder.strProjectFile) as Sample_list:

        listSamples = Helper.RemoveNullAndBadKeyword(Sample_list)

        strInputProject = './Input/{user}/Query/{project}'.format(
            user=options.user_name, project=options.project_name)

        @CheckProcessedFiles
        def RunPipeline(**kwargs):

            for strSample in listSamples:
                if strSample[0] == '#': continue

                tupSampleInfo = Helper.SplitSampleInfo(strSample)
                if not tupSampleInfo: continue
                strSample, strRef, strExpCtrl = tupSampleInfo

                InstBaseEdit = clsBaseEditRunner(strSample, strRef, options,
                                                 InstInitFolder)
                InstBaseEdit.MakeReference()

                listCmd = InstBaseEdit.MakeIndelSearcherCmd()
                ###print(lCmd[:5])
                RunMulticore(listCmd, options.multicore)  ## from CoreSystem.py

                InstBaseEdit.MakeMergeTarget()

            InstBaseEdit.CopyToAllResultFolder()

        RunPipeline(InstInitFolder=InstInitFolder,
                    strInputProject=strInputProject,
                    listSamples=listSamples,
                    logging=logging)

    print('BaseEdit program end: %s' % datetime.now())
def Main():
    parser = OptionParser('Indel search program for CRISPR CAS9 & CPF1\n<All default option> python2.7 Run_indel_searcher.py --pam_type Cas9 --pam_pos Forward')

    parser.add_option('-t', '--thread', default='1', type='int', dest='multicore', help='multiprocessing number, recommendation:t<16')
    parser.add_option('-c', '--chunk_number', default='400000', type='int', dest='chunk_number',
                      help='split FASTQ, must be multiples of 4. file size < 1G recommendation:40000, size > 1G recommendation:400000')
    parser.add_option('-q', '--base_quality', default='20', dest='base_quality', help='NGS read base quality')
    parser.add_option('--gap_open', default='-10', type='float', dest='gap_open', help='gap open: -100~0')
    parser.add_option('--gap_extend', default='1', type='float', dest='gap_extend', help='gap extend: 1~100')
    parser.add_option('-i', '--insertion_window', default='4', type='int', dest='insertion_window', help='a window size for insertions')
    parser.add_option('-d', '--deletion_window', default='4', type='int', dest='deletion_window', help='a window size for deletions')
    parser.add_option('--pam_type', dest='pam_type', help='PAM type: Cas9 Cpf1')
    parser.add_option('--pam_pos', dest='pam_pos', help='PAM position: Forward Reverse')
    parser.add_option('--python', dest='python', help='The python path including the CRISPResso2')
    parser.add_option('--user', dest='user_name', help='The user name with no space')
    parser.add_option('--project', dest='project_name', help='The project name with no space')
    parser.add_option('--pickle', dest='pickle', default='False', help='Dont remove the pickles in the tmp folder : True, False')
    parser.add_option('--split', dest='split', default='False', help='Dont remove the split files in the input folder : True, False')
    parser.add_option('--classfied_FASTQ', dest='class_fastq', default='True', help='Dont remove the ClassfiedFASTQ in the tmp folder : True, False')
    parser.add_option('--ednafull', dest='ednafull', help='The nucleotide alignment matrix')

    options, args = parser.parse_args()

    InstInitFolder = InitialFolder(options.user_name, options.project_name, os.path.basename(__file__))
    InstInitFolder.MakeDefaultFolder()
    InstInitFolder.MakeInputFolder()
    InstInitFolder.MakeOutputFolder()

    logging.basicConfig(format='%(process)d %(levelname)s %(asctime)s : %(message)s',
                        level=logging.DEBUG,
                        filename=InstInitFolder.strLogPath,
                        filemode='a')
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logging.info('Program start')
    if options.multicore > 15:
        logging.warning('Optimal treads <= 15')
    logging.info(str(options))
    """
    InstInitFolder.strProjectFile is...
    ./User/Nahye/2019_Nahye_Cas9D7_samples.txt in INPUT3
    
    190819_Nahye_12K_D4_D0_1-Cas9D7 Cas9D7  Ctrl 
    190819_Nahye_12K_D4_eCas9_Rep1-Cas9D7   Cas9D7  Exp 
    190819_Nahye_12K_D4_eCas9_Rep2-Cas9D7   Cas9D7  Exp 
    190819_Nahye_12K_D4_evo_Rep1-Cas9D7 Cas9D7  Exp  
    """
    with open(InstInitFolder.strProjectFile) as Sample_list:

        listSamples        = Helper.RemoveNullAndBadKeyword(Sample_list)
        intProjectNumInTxt = len(listSamples)

        strInputProject = './Input/{user}/FASTQ/{project}'.format(user=options.user_name, project=options.project_name)

        @CheckProcessedFiles
        def RunPipeline(**kwargs):

            setGroup = set()
            """
            listSamples is ...
            190819_Nahye_12K_D4_D0_1-Cas9D7 Cas9D7  Ctrl 
            190819_Nahye_12K_D4_eCas9_Rep1-Cas9D7   Cas9D7  Exp ...
            """
            for strSample in listSamples:

                """
                tupSampleInfo is ... tuple instance
                (190819_Nahye_12K_D4_D0_1-Cas9D7, Cas9D7,  Ctrl)
                """
                tupSampleInfo = Helper.SplitSampleInfo(strSample)
                if not tupSampleInfo: continue

                """
                strSample = 190819_Nahye_12K_D4_D0_1-Cas9D7 ... sample name
                , strRef = Cas9D7 ... reference name
                , strExpCtrl = Ctrl/Exp/"" 
                """
                strSample, strRef, strExpCtrl = tupSampleInfo
                setGroup.add(strExpCtrl)

                """
                options ... has PAM type: Cas9 Cpf1, PAM position: Forward Reverse ...
                """
                InstRunner = clsIndelSearcherRunner(strSample, strRef, options, InstInitFolder)
                #"""
                logging.info('SplitFile')
                InstRunner.SplitFile()
                logging.info('MakeReference')
                InstRunner.MakeReference()

                """
                Indel_searcher_crispresso_hash.py
                """
                logging.info('MakeIndelSearcherCmd')
                listCmd = InstRunner.MakeIndelSearcherCmd()
                logging.info('RunMulticore')
                RunMulticore(listCmd, options.multicore)  ## from CoreSystem.py
                logging.info('MakeOutput')
                InstRunner.MakeOutput()
                logging.info('RunIndelFreqCalculator')
                InstRunner.RunIndelFreqCalculator()
                #"""

            if setGroup == {'EXP', 'CTRL'}:
                InstRunner.IndelNormalization()
            elif setGroup in [set(), set([]), set(['']), set([' '])]:
                pass
            else:
                logging.error('The group category is not appropriate. : %s' % setGroup)
                logging.error('Please make sure your project file is correct.')
                logging.error('The group category must be Exp or Ctrl')
                raise Exception
            #"""

        RunPipeline(InstInitFolder=InstInitFolder,
                    strInputProject=strInputProject,
                    intProjectNumInTxt=intProjectNumInTxt,
                    listSamples=listSamples,
                    logging=logging)

    logging.info('Program end')