Esempi in Python per DataProcessing.ProcessData

Linguaggio di programmazione: Python

Classe/tipologia: DataProcessing

Metodo/funzione: ProcessData

Esempi su hotexamples.com: 7

DataProcessing.ProcessData in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per DataProcessing.ProcessData da pachetto NextVLAD-Attention-Model, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

DataProcessing(9)

ProcessData(7)

DataUnit(6)

RunModel(4)

datafile_exists(3)

HighGainObserver(3)

prep_table_dsp(3)

parse_date(2)

get_scanned_videos(2)

get_sorted(2)

get_growth(2)

get_thumb_data(2)

Dictionary(2)

update_videos(2)

Corpus(2)

get_dataframe(1)

get_dur_total(1)

get_datafilepath(1)

get_channel_id(1)

get_all(1)

get_latest_sum(1)

filter_video_timeframe(1)

get_num_items(1)

get_sums_ot(1)

get_num_items_ot(1)

get_subs_ot(1)

filter_channel_timeframe(1)

get_views_ot(1)

parse_duration(1)

rakuten(1)

refresh(1)

sort_channels(1)

sort_videos(1)

filter_video_region(1)

CELEBA(1)

filter_channel_region(1)

MNIST(1)

Clean(1)

Clean_Data(1)

DataPreprocessing(1)

DataProcessor(1)

Data_Extraction(1)

DatasetProcessingCIFAR_10(1)

Encode(1)

Entity_Recognition(1)

File(1)

Identify_Database(1)

Insert_Data(1)

Output(1)

delete_video(1)

Esempio n. 1

Mostra file

def main():
	# TODO preprocess the input file to get standard vectors
	configuration = config.get_config()
	filepath = configuration['datafile_path']
	processed_data = DataProcessing.ProcessData(filepath)

	""" Model designing part """
	# TODO design encoder
	for epi in range(configuration['max_epochs']):
		#
		print "training epoch ", epi
		#
		err = 0.0
		num_steps = 0
		# TODO: shuffle the training data and train this epoch
		##
		train_start = time.time()
		#
		seq_lang_numpy = []
		seq_world_numpy = []
		seq_action_numpy = []


		for name_map in configuration['maps_train']:
			max_steps = len(
					processed_data.dict_data['train'][name_map]
			)
			print 'max_steps=', max_steps
			for idx_data, data in enumerate(processed_data.dict_data['train'][name_map]):

				# seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
				seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(idx_data, name_map, 'train')
				# np.concatenate((seq_lang_numpy, seq_lang))
				# np.concatenate((seq_world_numpy, seq_world))
				# np.concatenate((seq_action_numpy, seq_action))


				""" trainer = Instantiates the model """
				model = models.SeqToSeq()
				cost_numpy = model.build_model(
					seq_lang_numpy,  # list of word indices
					seq_world_numpy,  # matrix of dim (len(one_data['cleanpath'])*78
					seq_action_numpy  # index value of 1 in one hot vector of action
				)
				print "Cost!!------", cost_numpy
				print "type = ", type(cost_numpy)
				print "shape = ", cost_numpy.shape
				print "---Cost_numpy___=",cost_numpy
				err += cost_numpy
				if idx_data % 100 == 99:
					print "training i-th out of N in map : ", (idx_data, max_steps, name_map)
			#
			num_steps += max_steps
		#
		train_err = err / num_steps

Esempio n. 2

Mostra file

def trainIters(encoder,
               attn_decoder,
               n_iters,
               learning_rate,
               print_every=1000,
               plot_every=100):
    # TODO preprocess the input file to get standard vectors
    configuration = config.get_config()
    filepath = configuration['datafile_path']
    """divides the data into train and dev"""
    processed_data = DataProcessing.ProcessData(filepath)
    run_model = DataProcessing.RunModel()
    """ Model designing part """
    # TODO design encoder
    # max_action_len = 30

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(attn_decoder.parameters(), lr=learning_rate)
    count = 0
    folds = configuration['folds']

    criterion = nn.NLLLoss()
    """ Training part """
    for epi in range(n_iters):
        #
        print "training epoch ", epi
        #
        train_err_epoch = []
        val_err_epoch = []
        accuracy_for_epoch = []
        # TODO: shuffle the training data and train this epoch
        ##
        train_start = time.time()
        #

        for fold in range(folds):
            print "Fold: ", fold
            train_err = 0.0
            num_steps = 0
            seq_lang_numpy = []
            seq_world_numpy = []
            seq_action_numpy = []

            for name_map in configuration['maps_train'][fold]:
                max_steps = len(processed_data.dict_data['train'][name_map])
                print 'max_steps=', max_steps

                for idx_data, data in enumerate(
                        processed_data.dict_data['train'][name_map]):

                    count += 1
                    # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                    seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
                        idx_data, name_map, 'train')

                    seq_lang_numpy = Variable(
                        torch.LongTensor(seq_lang_numpy).view(-1, 1))
                    seq_world_numpy = Variable(
                        torch.FloatTensor(seq_world_numpy))
                    seq_action_numpy = Variable(
                        torch.LongTensor(seq_action_numpy).view(-1, 1))
                    """ trainer = Instantiates the model """

                    loss = train(idx_data, name_map, seq_lang_numpy,
                                 seq_world_numpy, seq_action_numpy, encoder,
                                 attn_decoder, encoder_optimizer,
                                 decoder_optimizer, criterion, processed_data,
                                 "train", run_model)

                    train_err += loss
                    print_loss_total += loss
                    plot_loss_total += loss

                    if idx_data % 100 == 99:
                        print "training i-th out of N in map : ", (idx_data,
                                                                   max_steps,
                                                                   name_map)

                    if count % print_every == 0:
                        print_loss_avg = print_loss_total / print_every
                        print_loss_total = 0

                        print "----------------calculating training loss------------"
                        print "TimeSince=", time_since(start, count / n_iters)
                        print "Itr=", count
                        print " Percentage of code run=", count / n_iters * 100
                        print "Loss=", print_loss_avg
                        print "--------------------------------------------"
                        print ""
                        print ""

                        # if idx_data == 20:
                        #     break

                num_steps += max_steps

            #
            avg_train_err = train_err / num_steps
            train_err_epoch.append(avg_train_err)

            print "validating ... "
            #
            val_err = 0.0
            num_steps = 0
            dev_start = time.time()
            #
            for name_map in configuration['maps_train'][fold]:
                max_steps = len(processed_data.dict_data['dev'][name_map])
                for idx_data, data in enumerate(
                        processed_data.dict_data['dev'][name_map]):
                    count += 1
                    # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                    seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
                        idx_data, name_map, 'dev')

                    seq_lang_numpy = Variable(
                        torch.LongTensor(seq_lang_numpy).view(-1, 1))
                    seq_world_numpy = Variable(
                        torch.FloatTensor(seq_world_numpy))
                    seq_action_numpy = Variable(
                        torch.LongTensor(seq_action_numpy).view(-1, 1))
                    """ trainer = Instantiates the model """

                    loss = train(idx_data, name_map, seq_lang_numpy,
                                 seq_world_numpy, seq_action_numpy, encoder,
                                 attn_decoder, encoder_optimizer,
                                 decoder_optimizer, criterion, processed_data,
                                 "dev", run_model)

                    val_err += loss
                    print_loss_total += loss
                    plot_loss_total += loss

                    if idx_data % 100 == 99:
                        print "training i-th out of N in map : ", (idx_data,
                                                                   max_steps,
                                                                   name_map)

                    if count % print_every == 0:
                        print_loss_avg = print_loss_total / print_every
                        print_loss_total = 0

                        print "----------------calculating validation loss------------"
                        print "TimeSince=", time_since(start, count / n_iters)
                        print "Itr=", count
                        print " Percentage of code run=", (count /
                                                           n_iters) * 100
                        print "Loss=", print_loss_avg
                        print "--------------------------------------------"
                        print ""
                        print ""

                        # if idx_data == 20:
                        #     break

                num_steps += max_steps

            avg_val_error = val_err / num_steps
            val_err_epoch.append(avg_val_error)
            print "Epoch = ", epi, "  Train error = ", avg_train_err, "  Validation error = ", avg_val_error, " diff = "\
                , avg_train_err - avg_val_error

            # Add testing code here
            test_map_name = configuration['map_test'][fold]
            print "maptest = ", test_map_name
            cnt_success = 0
            tag_split = 'train'
            cnt, total_tuples1, _, _ = evaluate(encoder, attn_decoder,
                                                tag_split, test_map_name,
                                                processed_data, run_model)
            cnt_success += cnt
            tag_split = 'dev'
            cnt, total_tuples2, _, _ = evaluate(encoder, attn_decoder,
                                                tag_split, test_map_name,
                                                processed_data, run_model)
            cnt_success += cnt
            accuracy_for_fold = (cnt_success /
                                 ((total_tuples1 + total_tuples2) * 1.0)) * 100
            accuracy_for_epoch.append(accuracy_for_fold)
            print "Accuracy:for fold:", fold, "= ", accuracy_for_fold, " %"

        avg_train_err_epi = (sum(train_err_epoch) / 3.0)
        avg_val_error_epi = (sum(val_err_epoch) / 3.0)
        avg_accuracy_epi = (sum(accuracy_for_epoch) / 3.0)
        print "Average train error for epoch ", epi, ": ", avg_train_err_epi
        print "Average val error for epoch ", epi, ": ", avg_val_error_epi
        print "Average accuracy for epoch ", epi, ": ", avg_accuracy_epi
        print "Train error - val error : ", avg_train_err_epi - avg_val_error_epi

        # Save the model after every epoch
        tracks = configuration['save_filepath']
        id_process = os.getpid()
        time_current = datetime.datetime.now().isoformat()
        tag_model = '_PID=' + str(id_process) + '_TIME=' + time_current
        path_track = tracks + 'track' + "_3FoldEpoch_" + str(
            epi) + "_" + tag_model + '/'

        command_mkdir = 'mkdir -p ' + os.path.abspath(path_track)
        os.system(command_mkdir)
        #

        ENCODER_PATH = path_track + 'encoder.pkl'
        DECODER_PATH = path_track + 'decoder.pkl'
        torch.save(encoder, ENCODER_PATH)
        torch.save(attn_decoder, DECODER_PATH)

Esempio n. 3

Mostra file

File: test.py Progetto: cattigers/listen-attend-and-walk

def evaluate(encoder, decoder, tag_split, max_length=MAX_LENGTH):
    configuration = config.get_config()
    filepath = configuration['datafile_path']
    name_map = configuration['map_test'][0]
    processed_data = DataProcessing.ProcessData(filepath)
    run_model = DataProcessing.RunModel()
    all_actions = []
    all_attentions = []

    cnt_success = 0

    for idx_data, data in enumerate(
            processed_data.dict_data[tag_split][name_map]):
        actions = []
        for act in data['action']:
            actions.append(np.argmax(act))

        seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
            idx_data, name_map, tag_split)

        seq_lang = Variable(torch.LongTensor(seq_lang_numpy).view(-1, 1))
        seq_world = Variable(torch.FloatTensor(seq_world_numpy))
        seq_action = Variable(torch.LongTensor(seq_action_numpy).view(-1, 1))

        input_length = seq_lang.size()[0]

        pos_start, pos_end = processed_data.get_pos(idx_data, name_map,
                                                    tag_split)

        pos_curr = pos_start

        encoder_hidden = encoder.initHidden()

        encoder_outputs = Variable(torch.zeros(max_length,
                                               encoder.hidden_size))
        encoder_outputs = encoder_outputs.cuda(
        ) if use_cuda else encoder_outputs

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(seq_lang[ei],
                                                     encoder_hidden)
            # encoder_outputs[ei] is an extra term when compared to that in train function
            encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]

        decoder_input = seq_world[0]
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        decoder_hidden = encoder_hidden.view(1, 1, encoder.hidden_size)

        decoded_actions = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0][0]

            pos_curr = run_model.take_one_step(pos_curr, ni)
            # world state of next position
            decoder_input = run_model.get_feat_current_position(
                pos_curr, name_map)
            decoder_input = Variable(torch.FloatTensor([decoder_input]))
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

            if ni == STOP:
                decoded_actions.append(3)
                break
            else:
                decoded_actions.append(ni)

        all_actions.append(decoded_actions)
        all_attentions.append(decoder_attentions[:di + 1])

        if check_position_end(pos_curr, data['cleanpath'][-1]):
            cnt_success += 1

        print "decoded action = ", decoded_actions

    return cnt_success, all_actions, all_attentions

Esempio n. 4

Mostra file

File: test.py Progetto: cattigers/listen-attend-and-walk

def SampleTest(encoder,
               decoder,
               idx_data,
               sentence,
               map_name,
               max_length=MAX_LENGTH):
    """ idx_data: this is the index number of test data of 'l' map's dev set"""
    configuration = config.get_config()
    filepath = configuration['datafile_path']
    name_map = configuration['map_test'][0]
    processed_data = DataProcessing.ProcessData(filepath)
    run_model = DataProcessing.RunModel()

    idx_data, path = get_data_tuple(idx_data, sentence, processed_data,
                                    map_name)

    all_actions = []
    all_attentions = []
    print "Given instruction:   ", sentence

    seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
        idx_data, name_map, 'dev')

    seq_lang = Variable(torch.LongTensor(seq_lang_numpy).view(-1, 1))
    seq_world = Variable(torch.FloatTensor(seq_world_numpy))
    seq_action = Variable(torch.LongTensor(seq_action_numpy).view(-1, 1))

    input_length = seq_lang.size()[0]

    pos_start, pos_end = processed_data.get_pos(idx_data, name_map, 'dev')

    pos_curr = pos_start

    encoder_hidden = encoder.initHidden()

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
    encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(seq_lang[ei], encoder_hidden)
        # encoder_outputs[ei] is an extra term when compared to that in train function
        encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]

    decoder_input = seq_world[0]
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden.view(1, 1, encoder.hidden_size)

    decoded_actions = []
    decoder_attentions = torch.zeros(max_length, max_length)

    for di in range(max_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        decoder_attentions[di] = decoder_attention.data
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0]

        pos_curr = run_model.take_one_step(pos_curr, ni)
        # world state of next position
        decoder_input = run_model.get_feat_current_position(pos_curr, name_map)
        decoder_input = Variable(torch.FloatTensor([decoder_input]))
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        if ni == STOP:
            decoded_actions.append(3)
            break
        else:
            decoded_actions.append(ni)

    print "decoded action = ", decoded_actions

    return decoded_actions, decoder_attentions[:di + 1], path

Esempio n. 5

Mostra file

File: main.py Progetto: cattigers/listen-attend-and-walk

def trainIters(encoder,
               attn_decoder,
               n_iters,
               learning_rate,
               print_every=1000,
               plot_every=100):
    # TODO preprocess the input file to get standard vectors
    configuration = config.get_config()
    filepath = configuration['datafile_path']
    """divides the data into train and dev"""
    processed_data = DataProcessing.ProcessData(filepath)
    """ Model designing part """
    # TODO design encoder
    # max_action_len = 30

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(attn_decoder.parameters(), lr=learning_rate)
    count = 0

    criterion = nn.NLLLoss()
    """ Training part """
    for epi in range(n_iters):
        #
        print "training epoch ", epi
        #
        train_err = 0.0
        num_steps = 0
        # TODO: shuffle the training data and train this epoch
        ##
        train_start = time.time()
        #
        seq_lang_numpy = []
        seq_world_numpy = []
        seq_action_numpy = []

        for name_map in configuration['maps_train'][0]:
            max_steps = len(processed_data.dict_data['train'][name_map])
            print 'max_steps=', max_steps

            for idx_data, data in enumerate(
                    processed_data.dict_data['train'][name_map]):

                count += 1
                # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
                    idx_data, name_map, 'train')

                seq_lang_numpy = Variable(
                    torch.LongTensor(seq_lang_numpy).view(-1, 1))
                seq_world_numpy = Variable(torch.FloatTensor(seq_world_numpy))
                seq_action_numpy = Variable(
                    torch.LongTensor(seq_action_numpy).view(-1, 1))
                """ trainer = Instantiates the model """

                loss = train(idx_data,
                             name_map,
                             seq_lang_numpy,
                             seq_world_numpy,
                             seq_action_numpy,
                             encoder,
                             attn_decoder,
                             encoder_optimizer,
                             decoder_optimizer,
                             criterion,
                             processed_data,
                             flag="train")

                train_err += loss
                print_loss_total += loss
                plot_loss_total += loss

                if idx_data % 100 == 99:
                    print "training i-th out of N in map : ", (idx_data,
                                                               max_steps,
                                                               name_map)

                if count % print_every == 0:
                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0

                    print "----------------calculating training loss------------"
                    print "TimeSince=", time_since(start, count / n_iters)
                    print "Itr=", count
                    print " Percentage of code run=", count / n_iters * 100
                    print "Loss=", print_loss_avg
                    print "--------------------------------------------"
                    print ""
                    print ""

                # if idx_data == 20:
                #     break

            num_steps += max_steps
        #
        avg_train_err = train_err / num_steps

        print "validating ... "
        #
        val_err = 0.0
        num_steps = 0
        dev_start = time.time()
        #
        for name_map in configuration['maps_train'][0]:
            max_steps = len(processed_data.dict_data['dev'][name_map])
            for idx_data, data in enumerate(
                    processed_data.dict_data['dev'][name_map]):
                count += 1
                # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(
                    idx_data, name_map, 'dev')

                seq_lang_numpy = Variable(
                    torch.LongTensor(seq_lang_numpy).view(-1, 1))
                seq_world_numpy = Variable(torch.FloatTensor(seq_world_numpy))
                seq_action_numpy = Variable(
                    torch.LongTensor(seq_action_numpy).view(-1, 1))
                """ trainer = Instantiates the model """

                loss = train(idx_data,
                             name_map,
                             seq_lang_numpy,
                             seq_world_numpy,
                             seq_action_numpy,
                             encoder,
                             attn_decoder,
                             encoder_optimizer,
                             decoder_optimizer,
                             criterion,
                             processed_data,
                             flag="validate")

                val_err += loss
                print_loss_total += loss
                plot_loss_total += loss

                if idx_data % 100 == 99:
                    print "training i-th out of N in map : ", (idx_data,
                                                               max_steps,
                                                               name_map)

                if count % print_every == 0:
                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0

                    print "----------------calculating validation loss------------"
                    print "TimeSince=", time_since(start, count / n_iters)
                    print "Itr=", count
                    print " Percentage of code run=", (count / n_iters) * 100
                    print "Loss=", print_loss_avg
                    print "--------------------------------------------"
                    print ""
                    print ""

                # if idx_data == 20:
                #     break

            num_steps += max_steps

        avg_val_error = val_err / num_steps
        print "Epoch = ", epi, "  Train error = ", avg_train_err, "  Validation error = ", avg_val_error

        tracks = configuration['save_filepath']
        id_process = os.getpid()
        time_current = datetime.datetime.now().isoformat()
        tag_model = '_PID=' + str(id_process) + '_TIME=' + time_current
        path_track = tracks + 'track' + "_Global_Epoch_" + str(
            epi) + "_" + tag_model + '/'

        command_mkdir = 'mkdir -p ' + os.path.abspath(path_track)
        os.system(command_mkdir)
        #

        ENCODER_PATH = path_track + 'encoder.pkl'
        DECODER_PATH = path_track + 'decoder.pkl'
        torch.save(encoder, ENCODER_PATH)
        torch.save(attn_decoder, DECODER_PATH)

Esempio n. 6

Mostra file

def StoreData():

    patientDBSize = PatientDatabase.count_documents({})
    print(patientDBSize)

    if patientDBSize > 0:

        print('Patient COllection filled', patientDBSize)

        # Get all patient document From the database and change to Panda DataFrame
        patientData = PatientDatabase.find({})

        data = pd.DataFrame(list(patientData))

        for item in files_list:
            result = pd.read_csv(
                '/home/bizzzzzzzzzzzzu/Music/MedicalPortal/MedicPortal DataProcessing/FetchedData/'
                + item)
            matchedResult = DataProcessing.ProcessData(data, item)

            # Link the datas if there are any matches between the stored and the fetched

            if matchedResult is None:
                print('None is Matched')
                '''Create New Document in Patients Collection'''

                # result = pd.read_csv('/home/bizzzzzzzzzzzzu/Music/MedicalPortal/MedicPortal DataProcessing/FetchedData/'+item)

                # remove the _id column so no redendency appear
                del result['_id']
                # print('done creating new',hospitalData)
                PatientDatabase.insert_many(result.to_dict('records'))

            else:
                # Update the Patient Document
                print('Possible Matches', matchedResult, item)

                findPatientData = PatientDatabase.find({})

                for matchList in matchedResult:
                    # print(matchList[0],matchList[1])

                    # Get the history from the left data frame
                    history = json.loads(result.iloc[matchList[1]]['history'])

                    id = findPatientData[matchList[0]]['_id']
                    PatientDatabase.find_one_and_update(
                        {'_id': id}, {'$push': {
                            'history': history[0]
                        }},
                        upsert=True)

    else:
        hospitalData = pd.read_csv(
            '/home/bizzzzzzzzzzzzu/Music/MedicalPortal/MedicPortal DataProcessing/FetchedData/'
            + files_list[0])
        del hospitalData['_id']

        print(type(hospitalData))
        for i in range(0, len(hospitalData)):
            history = json.loads(hospitalData.iloc[i]['history'])
            hospitalData.loc[i, 'history'] = [history]
            print('EDITED LIST', hospitalData.iloc[i])

            # print(hospitalData)
        PatientDatabase.insert_many(hospitalData.to_dict('records'))

    return True

Esempio n. 7

Mostra file

def trainIters(encoder, attn_decoder, n_iters, learning_rate, print_every=2, plot_every=100):
    # TODO preprocess the input file to get standard vectors
    configuration = config.get_config()
    filepath = configuration['datafile_path']

    """divides the data into train and dev"""
    processed_data = DataProcessing.ProcessData(filepath)

    """ Model designing part """
    # TODO design encoder
    # max_action_len = 30

    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(attn_decoder.parameters(), lr=learning_rate)
    count = 0

    criterion = nn.NLLLoss()

    """ Training part """
    for epi in range(n_iters):
        #
        print "training epoch ", epi
        #
        err = 0.0
        num_steps = 0
        # TODO: shuffle the training data and train this epoch
        ##
        train_start = time.time()
        #
        seq_lang_numpy = []
        seq_world_numpy = []
        seq_action_numpy = []

        for name_map in configuration['maps_train'][0]:
            max_steps = len(
                    processed_data.dict_data['train'][name_map]
            )
            print 'max_steps=', max_steps

            for idx_data, data in enumerate(processed_data.dict_data['train'][name_map]):

                count += 1
                # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(idx_data, name_map,
                                                                                                    'train')

                seq_lang_numpy = Variable(torch.LongTensor(seq_lang_numpy).view(-1, 1))
                seq_world_numpy = Variable(torch.FloatTensor(seq_world_numpy))
                seq_action_numpy = Variable(torch.LongTensor(seq_action_numpy).view(-1, 1))

                """ trainer = Instantiates the model """

                loss = train(idx_data, name_map, seq_lang_numpy, seq_world_numpy, seq_action_numpy, encoder,
                             attn_decoder, encoder_optimizer, decoder_optimizer, criterion, processed_data, flag="train")

                print_loss_total += loss
                plot_loss_total += loss

                if idx_data % 100 == 99:
                    print "training i-th out of N in map : ", (idx_data, max_steps, name_map)

                if count % print_every == 0:
                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0

                    print "----------------calculating training loss------------"
                    print "TimeSince=", time_since(start, count / n_iters)
                    print "Itr=", count
                    print " Percentage of code run=", count / n_iters * 100
                    print "Loss=", print_loss_avg
                    print "--------------------------------------------"
                    print ""
                    print ""

                if idx_data == 20:
                    break

            num_steps += max_steps
        #
        train_err = err / num_steps

        print "validating ... "
        #
        err = 0.0
        num_steps = 0
        dev_start = time.time()
        #
        for name_map in configuration['maps_train'][0]:
            max_steps = len(processed_data.dict_data['dev'][name_map])
            for idx_data, data in enumerate(processed_data.dict_data['dev'][name_map]):
                count += 1
                # seq_lang_numpy, seq_world_numpy and seq_action_numpy will be set
                seq_lang_numpy, seq_world_numpy, seq_action_numpy = processed_data.process_one_data(idx_data, name_map,
                                                                                                    'dev')

                seq_lang_numpy = Variable(torch.LongTensor(seq_lang_numpy).view(-1, 1))
                seq_world_numpy = Variable(torch.FloatTensor(seq_world_numpy))
                seq_action_numpy = Variable(torch.LongTensor(seq_action_numpy).view(-1, 1))

                """ trainer = Instantiates the model """

                loss = train(idx_data, name_map, seq_lang_numpy, seq_world_numpy, seq_action_numpy, encoder,
                attn_decoder, encoder_optimizer, decoder_optimizer, criterion, processed_data, flag="validate")

                print_loss_total += loss
                plot_loss_total += loss

                if idx_data % 100 == 99:
                    print "training i-th out of N in map : ", (idx_data, max_steps, name_map)

                if count % print_every == 0:
                    print_loss_avg = print_loss_total / print_every
                    print_loss_total = 0

                    print "----------------calculating validation loss------------"
                    print "TimeSince=", time_since(start, count / n_iters)
                    print "Itr=", count
                    print " Percentage of code run=", (count / n_iters) * 100
                    print "Loss=", print_loss_avg
                    print "--------------------------------------------"
                    print ""
                    print ""

                if idx_data == 20:
                    break

            num_steps += max_steps