예제 #1
0
def import_data():
    file = "save_obj.txt"
    if os.path.isfile(file):  ##if the file exists import from file
        print "importing data from file..."
        with open(file, "rb") as myFile:
            total_set = pickle.load(myFile)
    else:  ##we havee to create the file again
        print "creating load file data... (need to test this approach...)"
        total_set = []
        ids = myDB.getIds()
        for id in ids:
            if id not in ids_new_tag:
                continue
            print id
            info = myDB.getStressedUts(
                id, 1, "Stress_based_on_hf_10sec_window"
            )  ##returns event, starttime for each utterance
            for i in info:
                ut = myDB.getUtterancesFeatures(
                    i[0], id, int(i[1])
                )  ##receives event, id, starttime and returns 6125 values for that utterance
                if ut == []:  ##if there is no utterace features proceed to next one
                    continue
                f = data_instance(
                    id, i[0], int(i[1]), 0, ut,
                    1)  ##0 is for the duration im not reading it from the db
                total_set.append(f)
            info = myDB.getStressedUts(
                id, 0, "Stress_based_on_hf_10sec_window"
            )  ##returns event, starttime for each utterance
            for i in info:
                ut = myDB.getUtterancesFeatures(
                    i[0], id, int(i[1])
                )  ##receives event, id, starttime and returns 6125 values for that utterance
                if ut == []:  ##if there is no utterace features proceed to next one
                    continue
                f = data_instance(id, i[0], int(i[1]), 0, ut, 0)
                total_set.append(f)
        #for i in total_set:
        #    print i.id, " ", i.start_time, " ", i.stress
        with open(file, "wb") as myFile:
            pickle.dump(total_set, myFile)
    #for i in data["stress"]:
    #   print i
    #print "________________________________________________"
    #for i in data["no_stress"]:
    #    print i
    return total_set
예제 #2
0
def import_data_csv(data_file, info_file):
    total_set = []
    f_data = open(data_file, "r")  ##ler file
    data = f_data.readlines()
    if info_file != "":  ##abrir file de info se necessario
        f_info = open(info_file, "r")
        info = f_info.readlines()
    for i in range(0, len(data)):
        line = data[i].split(", ")
        ft_values = []
        for j in range(
                0,
                len(line) - 1
        ):  ##read the values for the line, the last collumn represents the class
            ft_values.append(float(line[j]))
        line_class = int(line[len(line) - 1])
        id = 0
        event = ""
        startTime = 0
        duration = 0
        if info_file != "":  ##if we have an info file lets update the fields
            line_info = info[i + 1].split(
                ", ")  ##info file starts with the headers... must add +1
            id = int(line_info[0])
            event = line_info[1]
            startTime = int(line_info[2])
            duration = int(line_info[3])
        instance = data_instance(
            id, event, startTime, duration, ft_values,
            line_class)  ##now that we have all info lets save the info
        total_set.append(instance)
    return total_set
예제 #3
0
def import_data_csv(data_file, info_file):
    total_set = []
    f_data = open(data_file, "r") ##ler file
    data = f_data.readlines()
    if info_file != "": ##abrir file de info se necessario
        f_info = open(info_file, "r")
        info = f_info.readlines()
    for i in range(0,len(data)):
        line = data[i].split(", ")
        if len(line) < 10:
            line = data[i].split(",")
        ft_values = []
        for j in range(0, len(line)-1): ##read the values for the line, the last collumn represents the class 
            ft_values.append(float(line[j]))
        line_class = line[len(line)-1]
        if "\r" in line_class:
            line_class = line_class.split("\r")[0]
        line_class = int(float(line[len(line)-1]))
        id = 0
        event = ""
        startTime = 0
        duration = 0
        if info_file != "": ##if we have an info file lets update the fields
            line_info = info[i+1].split(", ") ##info file starts with the headers... must add +1
            if len(line_info) < 10:
                line_info = info[i+1].split(",")
            id = int(line_info[0])
            event = line_info[1]
            startTime = int(line_info[2])
            duration = int(line_info[3])
        instance = data_instance(id, event, startTime, duration, ft_values, line_class) ##now that we have all info lets save the info
        total_set.append(instance)
    return total_set 
예제 #4
0
def import_nips_sparse_binary(data_file, labels_file, n_fts):
    total_set = []
    f_data = open(data_file, "r")
    data = f_data.readlines()
    f_data.close()
    if labels_file != "":
        f_labels = open(labels_file, "r")
        labels = f_labels.readlines()
        f_labels.close()
    for i in range(0, len(data)):
        vals = [0.0] * n_fts ##nao se poder usar isto porque cria apontadores inves de valores
        line = data[i]
        line = line.split(" ")
        for l in line:  ##read data from line
            if l == "\n":
                continue
            ft = int(l)-1
            vals[ft] = 1.0
        ##read label from the other file
        if labels_file != "":
            label = float(labels[i])
        else:
            label = "?"
        instance = data_instance("", vals, label)
        total_set.append(instance)
    return total_set
예제 #5
0
def import_nips_dense(data_file, labels_file):
    total_set = []
    f_data = open(data_file, "r")
    data = f_data.readlines()
    f_data.close()
    if labels_file != "":
        f_labels = open(labels_file, "r")
        labels = f_labels.readlines()
        f_labels.close()
    for i in range(0, len(data)):
        vals = []
        line = data[i]
        line = line.split(" ")
        #print line
        for l in line:  ##read data from line
            if l == "\n":
                continue
         #   print l
            vals.append(float(l))
        if labels_file != "":
            label = float(labels[i])
        else:
            label = "?"
        instance = data_instance("", vals, label)
        total_set.append(instance)
    return total_set
예제 #6
0
def import_sonar():
    file_read = open("sonar.all-data", "r")
    lines = file_read.readlines()
    file_read.close()
    
    class_rocks = []
    class_mines = []
    
    ## read instances separated by class and ordered
    for line in lines:
        line = line.split(",")
        vals = []
        for i in range(0, len(line)-1): ##armazena os valores
            vals.append(float(line[i]))
        ##print "label=", line[len(line)-1]
        if "R" in line[len(line)-1]: #class do tipo R, R = 1
            label = 1
            instance = data_instance("", vals, label)
            class_rocks.append(instance)
        else: # class do tipo M, M = 0
            label = 0
            instance = data_instance("", vals, label)
            class_mines.append(instance)
            
    ##division into train and test set by the owner of the dataset
    train_set = []
    test_set = []
    
    rocks_test_div = import_sonar_division("sonar.rocks")
    
    for i in range(0, len(class_rocks)):
        if i in rocks_test_div:  
            test_set.append(class_rocks[i])
        else:
            train_set.append(class_rocks[i])
    
    mines_test_div = import_sonar_division("sonar.mines")
    
    for i in range(0, len(class_mines)):
        if i in mines_test_div:  
            test_set.append(class_mines[i])
        else:
            train_set.append(class_mines[i])

    return train_set, test_set
예제 #7
0
def import_data():
    file = "save_obj.txt"
    if os.path.isfile(file):  ##if the file exists import from file
        print "importing data from file..."
       	with open(file, "rb") as myFile:
            total_set = pickle.load(myFile)
    else: ##we havee to create the file again
        print "creating load file data... (need to test this approach...)"
        total_set = []
        ids = myDB.getIds()
        for id in ids:
            if id not in ids_new_tag:
                continue
            print id
            info = myDB.getStressedUts(id, 1, "Stress_based_on_hf_10sec_window") ##returns event, starttime for each utterance
            for i in info:
                ut = myDB.getUtterancesFeatures(i[0], id, int(i[1])) ##receives event, id, starttime and returns 6125 values for that utterance
                if ut == []:  ##if there is no utterace features proceed to next one
                    continue
                f = data_instance(id, i[0], int(i[1]), 0, ut, 1)  ##0 is for the duration im not reading it from the db   
                total_set.append(f)
            info = myDB.getStressedUts(id, 0, "Stress_based_on_hf_10sec_window") ##returns event, starttime for each utterance
            for i in info:
                ut = myDB.getUtterancesFeatures(i[0], id, int(i[1])) ##receives event, id, starttime and returns 6125 values for that utterance
                if ut == []:  ##if there is no utterace features proceed to next one
                    continue
                f = data_instance(id, i[0], int(i[1]), 0, ut, 0)    
                total_set.append(f)
        #for i in total_set:
        #    print i.id, " ", i.start_time, " ", i.stress
        with open(file, "wb") as myFile:
            pickle.dump(total_set, myFile)
    #for i in data["stress"]:
     #   print i
    #print "________________________________________________"    
    #for i in data["no_stress"]:
    #    print i
    return total_set