Example #1
0
def main():
    #==============================USER INPUTS======================================

    #-------------------------------------------------------------------------------
    # WHOEVER IS USING THIS PROGRAM SHOULD CHANGE THESE TO WHATEVER THEY LIKE
    # SAX INPUTS
    # JUST REPLACE THE NUMBERS, DONT TOUCH THE COMMENT AFTER ESPECIALLY THE #
    word = 4  # word length for each channel
    letter = 5  # alphabet size, has to be greater than 3. less than 20

    # FILE INPUTS
    # JUST ENTER THE STARTING NUMBER OF THE EXPERIMENT AND THE NUMBER OF
    # FILES
    starter = '2'
    pstart = 2
    num_files = 19

    # Z-SCORE, NORMALIZING THE DATA
    # TYPE 'False' IF YOU DONT WANT IT, 'True' IF YOU DO
    zscore = False

    # REGION OF ANALYSIS
    roi = False

    # LIST THE CONDITIONS NUMBERS WITHOUT ANY SPACES IN BETWEEN EACH ONE
    conditions = "12"  # Which conditions do we want to look at for each file
    # FILE EXTENSION
    # Change each time you run to reflect where you would like the results to
    # be written to, MAKE SURE IT ALWAYS ENDS IN '.CSV
    ext = '1v2_Rec.csv'

    # ORANGE INPUTS
    # K-NEAREST NEIGHBOR
    num_k = 10  # how many neighbors
    n_num = 100  # feature selection importance
    #-------------------------------------------------------------------------------

    # Running the single files
    for i in range(pstart, num_files + 1):
        heads = run(starter + '{0:0>3}'.format(str(i)), word, letter, num_k,
                    n_num, conditions, zscore, roi, ext)

    # Running the single concatenated file
    concatenate(num_files, starter, 'Sax_Output.tab', 3, 0, pstart)
    concatenate(num_files, starter, 'Arff.arff', heads, 0, pstart)
    # Writing ML Data for concatenated files
    name = 'All_' + starter + '000_Data'
    write_csv((name + '_ML_Data' + ext), orange(name, num_k, n_num))

    # Running the Leaving a file out concatenated files
    for i in range(pstart, num_files + 1):
        concatenate(num_files, starter, 'Sax_Output.tab', 3, i, pstart)
        concatenate(num_files, starter, 'Arff.arff', heads, i, pstart)
        write_csv(
            name + '_ML_Data_No' + starter + '{0:0>3}'.format(str(i)) + ext,
            orange_two(starter, starter + '{0:0>3}'.format(str(i)), num_k,
                       n_num))
def main():
#==============================USER INPUTS======================================

#-------------------------------------------------------------------------------
    # WHOEVER IS USING THIS PROGRAM SHOULD CHANGE THESE TO WHATEVER THEY LIKE
    # SAX INPUTS
    # JUST REPLACE THE NUMBERS, DONT TOUCH THE COMMENT AFTER ESPECIALLY THE #
    word = 4 # word length for each channel
    letter = 5 # alphabet size, has to be greater than 3. less than 20

    # FILE INPUTS 
    # JUST ENTER THE STARTING NUMBER OF THE EXPERIMENT AND THE NUMBER OF
    # FILES
    starter = '2'
    pstart = 2
    num_files = 19

    # Z-SCORE, NORMALIZING THE DATA
    # TYPE 'False' IF YOU DONT WANT IT, 'True' IF YOU DO
    zscore = False

    # REGION OF ANALYSIS
    roi = False

    # LIST THE CONDITIONS NUMBERS WITHOUT ANY SPACES IN BETWEEN EACH ONE
    conditions = "12" # Which conditions do we want to look at for each file
    # FILE EXTENSION
    # Change each time you run to reflect where you would like the results to
    # be written to, MAKE SURE IT ALWAYS ENDS IN '.CSV
    ext = '1v2_Rec.csv' 
    
    # ORANGE INPUTS
    # K-NEAREST NEIGHBOR
    num_k = 10  # how many neighbors
    n_num = 100  # feature selection importance
#-------------------------------------------------------------------------------
    
    # Running the single files
    for i in range(pstart,num_files + 1):
        heads=run(starter + '{0:0>3}'.format(str(i)),word,letter,num_k,n_num,
                  conditions,zscore,roi,ext)
    
    # Running the single concatenated file
    concatenate(num_files,starter,'Sax_Output.tab',3,0,pstart)
    concatenate(num_files,starter,'Arff.arff',heads,0,pstart)
    # Writing ML Data for concatenated files
    name = 'All_'+starter+'000_Data'
    write_csv((name+'_ML_Data'+ext),orange(name,num_k,n_num))

    # Running the Leaving a file out concatenated files
    for i in range(pstart,num_files+1):
        concatenate(num_files,starter,'Sax_Output.tab',3,i,pstart)
        concatenate(num_files,starter,'Arff.arff',heads,i,pstart)
        write_csv(name+'_ML_Data_No'+starter+'{0:0>3}'.format(str(i))+ext,
                  orange_two(starter,starter+'{0:0>3}'.format(str(i)),num_k,
                             n_num))
Example #3
0
def run(head, word, letter, num_k, n_num, conditions, zscore, roi, ext):

    data_name = head + '_All_Data'
    conds_name = 'conditions_' + head

    #==========================INPUTTING DATA FILES=================================

    # open files
    data = open_csv(data_name + '.csv')
    conds = open_csv(conds_name + '.csv')
    num_chans = len(data[0]) / 2
    # separates total data into separate oxy and deoxy files, to be used
    # in the arff file generator and then Orange
    data = data_format(data, num_chans)
    conds = cond_format(conds)
    num_conds = (len(conds) - 1) / 2

    #===========================Z-SCORING THE DATA==================================

    # NOTE: This section is optional, might not be used every time
    if zscore:
        for j in range(num_chans * 2):
            zary = []
            for i in range(len(data) - 2):
                zary.append(data[i + 2][j])
            zary = stats.zscore(zary)
            for i in range(len(data) - 2):
                data[i + 2][j] = zary[i]

#=======================REGION OF INTEREST ANALYSIS=============================

# NOTE: This section is optional, might not be used every time
    if roi:
        # Reading the ROI lines from a file
        ROI_lines = []
        #with open('ROI_'+head+'.txt','rb') as f:
        with open('ROI_file_2000s.txt', 'rb') as f:
            for line in f:
                row = []
                num = ""
                for i in range(len(line)):
                    if line[i] != ',':
                        num += line[i]
                    else:
                        row.append(int(num))
                        num = ""
                row.append(int(num[:len(num) - 1]))
                ROI_lines.append(row)
            num_chans = len(ROI_lines)

        # Producing a new ROIized data table
        firstl = ['Oxy'] + [''] * (num_chans - 1) + ['Deoxy'
                                                     ] + [''] * (num_chans - 1)
        secondl = []
        for i in range(num_chans):
            secondl += [('CH' + str(i + 1))]
        new_data = []
        for i in range(len(data) - 2):
            oxy = []
            deoxy = []
            for line in ROI_lines:
                aveo = 0
                aved = 0
                for j in range(len(line)):
                    aveo += data[i + 2][line[j] - 1]
                    aved += data[i + 2][line[j] + num_chans - 1]
                oxy.append(float(aveo) / len(line))
                deoxy.append(float(aved) / len(line))
            new_data.append((oxy + deoxy))
        data = [firstl, secondl * 2]
        data += new_data

#========================CONVERTING TO MARKS FILE===============================

# Separate the oxy and deoxy data for the arff file generator
    write_sep_data(data, num_chans, data_name)
    marks = [['start', 'end', 'condition']]
    for i in range(num_conds + 1):
        if i == 0 or conditions.find(str(i)) == -1:
            continue
        for j in range(len(conds[i * 2]) - 2):
            row = [
                str(conds[i * 2 - 1][j + 2]),
                str(conds[i * 2 - 1][j + 2] + conds[i * 2][j + 2]),
                str(i)
            ]
            marks.append(row)

    # formats the conditions file into a [starts,ends, condition] file to
    # be used in the arff file generator
    write_csv(data_name + '_Marks.csv', marks)

    #========================SAX/SUMMER '12 REPRESENTATIONS=========================

    # Creating the Sax Representation (Writing it to a file)
    sax_lines = []
    sax = ""
    for i in range(num_conds + 1):
        # Checking which data we need by condition,
        if i == 0 or conditions.find(str(i)) == -1:
            continue
        for j in range(len(conds[i * 2]) - 2):
            start = conds[i * 2 - 1][j + 2]
            run = conds[i * 2][j + 2]
            for k in range(num_chans * 2):  # getting every channel
                ary = []
                for m in range(run):
                    ary.append(data[start + m][k])
                sax += sax_rep(word, letter, ary)[0]

            newsax = ""
            for n in range(len(sax)):
                newsax += (str((ord(str(sax[n])) - 96)) + "\t")

            # adding the header lines to the output file
            if sax_lines == []:
                first_line = ""
                second_line = ""
                for f in range(num_chans * 2):
                    for g in range(word):
                        first_line += ("Ch" + str((f % num_chans) + 1))
                        if f < num_chans:
                            first_line += "Oxy"
                        else:
                            first_line += "Deoxy"
                        first_line += ("Pos" + str(g + 1) + "\t")
                        second_line += "d\t"
                first_line += "condition\n"
                second_line += "d\n"
                sax_lines.append(first_line)
                sax_lines.append(second_line)
                sax_lines.append((num_chans * word * 2) * "\t" + "class\n")
            newsax += (str(i) + "\n")
            sax_lines.append(newsax)
            sax = ""

    # Writing the SAX lines to a file
    with open((data_name + '_Sax_Output.tab'), 'wb') as f:
        for i in range(len(sax_lines)):
            f.writelines(sax_lines[i])

    # arff file generator passed in, pass 1 into subjects until further notice
    arff_generate(data_name, num_conds, 1, num_chans, conditions)

    #=============================PASSING IN ORANGE=================================

    write_csv((head + '_ML_Data' + ext), orange(data_name, num_k, n_num))
    return (66 * num_chans + 3)
def run(head,word,letter,num_k,n_num,conditions,zscore,roi,ext):

    data_name = head + '_All_Data'
    conds_name = 'conditions_' + head  
    
#==========================INPUTTING DATA FILES=================================
    
    # open files
    data = open_csv(data_name+'.csv')
    conds = open_csv(conds_name+'.csv')
    num_chans = len(data[0])/2
    # separates total data into separate oxy and deoxy files, to be used
    # in the arff file generator and then Orange
    data = data_format(data,num_chans)
    conds = cond_format(conds)
    num_conds = (len(conds)-1)/2

#===========================Z-SCORING THE DATA==================================

    # NOTE: This section is optional, might not be used every time
    if zscore:
        for j in range(num_chans*2):
            zary = []
            for i in range(len(data)-2):
                zary.append(data[i+2][j])
            zary = stats.zscore(zary)
            for i in range(len(data)-2):
                data[i+2][j] = zary[i]

#=======================REGION OF INTEREST ANALYSIS=============================

    # NOTE: This section is optional, might not be used every time
    if roi:
        # Reading the ROI lines from a file
        ROI_lines = []
        #with open('ROI_'+head+'.txt','rb') as f:
        with open('ROI_file_2000s.txt','rb') as f:
            for line in f:
                row = []
                num = ""
                for i in range(len(line)):
                    if line[i] != ',':
                        num += line[i]
                    else:
                        row.append(int(num))
                        num = ""
                row.append(int(num[:len(num)-1]))
                ROI_lines.append(row)
            num_chans = len(ROI_lines)
        
        # Producing a new ROIized data table
        firstl = ['Oxy'] + ['']*(num_chans-1)+['Deoxy']+['']*(num_chans-1)
        secondl = []
        for i in range(num_chans):
            secondl += [('CH'+str(i+1))]
        new_data = []
        for i in range(len(data)-2):
            oxy = []
            deoxy = []
            for line in ROI_lines:
                aveo = 0
                aved = 0
                for j in range(len(line)):
                    aveo += data[i+2][line[j]-1]
                    aved += data[i+2][line[j]+num_chans-1]
                oxy.append(float(aveo)/len(line))
                deoxy.append(float(aved)/len(line))
            new_data.append((oxy+deoxy))
        data = [firstl,secondl*2]
        data+=new_data

#========================CONVERTING TO MARKS FILE===============================
    
    # Separate the oxy and deoxy data for the arff file generator
    write_sep_data(data,num_chans,data_name)
    marks = [['start','end','condition']]
    for i in range(num_conds+1):
        if i == 0 or conditions.find(str(i)) == -1:
            continue
        for j in range(len(conds[i*2])-2):
            row = [str(conds[i*2-1][j+2]),
                   str(conds[i*2-1][j+2]+conds[i*2][j+2]),
                   str(i)]
            marks.append(row)
    
    # formats the conditions file into a [starts,ends, condition] file to
    # be used in the arff file generator
    write_csv(data_name+'_Marks.csv',marks)
    
#========================SAX/SUMMER '12 REPRESENTATIONS=========================
    
    # Creating the Sax Representation (Writing it to a file)
    sax_lines = []
    sax = ""
    for i in range(num_conds+1):
        # Checking which data we need by condition,
        if i == 0 or conditions.find(str(i)) == -1:
            continue
        for j in range(len(conds[i*2])-2):
            start = conds[i*2-1][j+2]
            run = conds[i*2][j+2]
            for k in range(num_chans*2): # getting every channel
                ary = []
                for m in range(run):
                    ary.append(data[start+m][k])
                sax += sax_rep(word,letter,ary)[0]
            
            newsax = ""
            for n in range(len(sax)):
                newsax += (str((ord(str(sax[n]))-96))+"\t")
            
            # adding the header lines to the output file
            if sax_lines == []:
                first_line = ""
                second_line = ""
                for f in range(num_chans*2):
                    for g in range(word):
                        first_line += ("Ch" + str((f%num_chans)+1))
                        if f < num_chans:
                            first_line += "Oxy"
                        else:
                            first_line += "Deoxy"
                        first_line += ("Pos"+str(g+1)+"\t")
                        second_line += "d\t"
                first_line += "condition\n"
                second_line += "d\n"
                sax_lines.append(first_line)
                sax_lines.append(second_line)
                sax_lines.append((num_chans*word*2) * "\t" + "class\n")
            newsax += (str(i)+"\n")
            sax_lines.append(newsax)
            sax = ""
    
    # Writing the SAX lines to a file
    with open((data_name+'_Sax_Output.tab'),'wb') as f:
        for i in range(len(sax_lines)):
            f.writelines(sax_lines[i])
    
    # arff file generator passed in, pass 1 into subjects until further notice
    arff_generate(data_name,num_conds,1,num_chans,conditions)

#=============================PASSING IN ORANGE=================================

    write_csv((head+'_ML_Data'+ext),orange(data_name,num_k,n_num))
    return (66*num_chans+3)