Python DataLoader 예제들, lib.data_loader.DataLoader Python 예제들

예제 #1

0

파일 보기

파일: fetch_and_dump_to_csv.py 프로젝트: Daniel57910/udac_data_warehouse

def load_dataframe_from_files(file_list):
    '''
  data_loader receives the list of files from the file_finder
  aggregates the files into a list and returns a dataframe from the list for song and log data
  '''
    data_loader = DataLoader(file_list)
    return data_loader.create_dataframe_from_files()

예제 #2

0

파일 보기

파일: song_etl.py 프로젝트: hahalulu/psql_sparkify

def song_etl():

	song_json_path = os.getcwd() + '/data/song_data/'
	file_finder = FileFinder(song_json_path, '*.json')
	data_loader = DataLoader(file_finder.return_file_names())
	song_dataframe = data_loader.create_json_from_files()
	data_filter = DataFilter(song_dataframe)

	artist_dataset = data_filter.return_unique_dataframe_subset(
	  ['artist_id', 'artist_name', 'artist_location', 'artist_longitude', 'artist_latitude'], 
		['artist_id', 'artist_name']
	)

	song_dataset = data_filter.return_unique_dataframe_subset(
		['song_id', 'title', 'year', 'duration', 'artist_name'], 
		['song_id', 'title']
	)

	database_wrapper = DatabaseWrapper()

	database_wrapper.execute_batch_query(
		query['artist_insert'], 
		list(artist_dataset.itertuples(index=False, name=None))
	)

	database_wrapper.execute_batch_query(
		query['song_insert'], 
		list(song_dataset.itertuples(index=False, name=None))
	)

예제 #3

0

파일 보기

def log_etl():

    log_json_path = os.getcwd() + '/data/log_data/'
    file_finder = FileFinder(log_json_path, '*.json')
    data_loader = DataLoader(file_finder.return_file_names())
    data_filter = DataFilter(data_loader.create_json_from_files())

    user_set = data_filter.return_unique_dataframe_subset(
        ['firstName', 'lastName', 'gender', 'level'],
        ['firstName', 'lastName'])

    timestamp_data_set = data_filter.return_unique_dataframe_subset(
        ['ts', 'firstName', 'lastName'])

    songplay_dataset = data_filter.return_unique_dataframe_subset([
        'ts', 'firstName', 'lastName', 'level', 'song', 'artist', 'artist',
        'sessionId'
    ])

    database_wrapper = DatabaseWrapper()

    database_wrapper.execute_batch_query(
        query['user_insert'], list(user_set.itertuples(index=False,
                                                       name=None)))

    database_wrapper.execute_batch_query(
        query['timestamp_insert'],
        list(
            map(unpack_timestamp,
                timestamp_data_set.itertuples(name=None, index=False))))

    database_wrapper.execute_batch_query(
        query['songplay_insert'],
        list(songplay_dataset.itertuples(index=False, name=None)))

예제 #4

0

파일 보기

파일: create_aggregate_csv.py 프로젝트: rajsingh7/Data-Engineering

def create_aggregate_csv():

    event_columns = [
        'sessionId', 'itemInSession', 'artist', 'firstName', 'gender',
        'lastName', 'length', 'level', 'location', 'song', 'userId'
    ]
    user_columns = [
        'userId', 'sessionId', 'artist', 'firstName', 'gender',
        'itemInSession', 'lastName', 'length', 'level', 'location', 'song'
    ]
    user_and_song_columns = ['song', 'firstName', 'lastName']

    event_data_path = os.getcwd() + '/event_data'
    aggregate_csv_path = os.getcwd() + '/event_datafile_new.csv'
    user_and_session_path = os.getcwd() + '/user_and_session.csv'
    user_and_song_path = os.getcwd() + '/user_and_song.csv'

    file_finder = FileFinder(event_data_path, '*.csv')
    all_csv_files = file_finder.return_file_names()
    data_loader = DataLoader(all_csv_files)
    csv_dataframe = data_loader.create_dataframe_from_files()

    event_frame = csv_dataframe[
        csv_dataframe.itemInSession.apply(has_hashable_key)
        & csv_dataframe.sessionId.apply(has_hashable_key)]

    user_frame = csv_dataframe[
        csv_dataframe.userId.apply(has_hashable_key)
        & csv_dataframe.sessionId.apply(has_hashable_key)]

    user_and_song_frame = csv_dataframe[csv_dataframe.song.apply(
        has_hashable_key)]

    event_frame[event_columns].to_csv(path_or_buf=aggregate_csv_path,
                                      index=False)

    user_frame[user_columns].to_csv(path_or_buf=user_and_session_path,
                                    index=False)

    user_and_song_frame[user_and_song_columns].to_csv(
        path_or_buf=user_and_song_path, index=False)

예제 #5

0

파일 보기

파일: s3_to_gzip.py 프로젝트: Daniel57910/udac_airflow

def s3_to_gzip(data_type, columns, path):
  logger = logging.getLogger(__name__)

  temp_path = path + f'/tmp/{data_type}'
  staging_path = path + '/staging'
  logger.info(f'Syncing {data_type} to {temp_path}')

  if not os.path.exists(temp_path):
    os.makedirs(temp_path)

  subprocess.run(f'aws s3 sync s3://udacity-dend/{data_type} {temp_path}', shell=True, check=True)

  file_finder = FileFinder(temp_path+ '/', '*.json')
  file_names = list(file_finder.return_file_names())

  data_loader = DataLoader(file_names)

  dataframe = data_loader.create_dataframe_from_files()

  clean_dataframe_of_non_alphanumeric_characters(dataframe, columns)

  logger.info(f'saving {data_type} staging file to {staging_path}')
  if not os.path.exists(staging_path):
    os.makedirs(staging_path)

  dataframe.to_csv(
    staging_path + f'/{data_type}.gz',
    header=False,
    index=False,
    compression='gzip'
  )

  dataframe.to_csv(
    staging_path + f'/{data_type}.csv',
    header=True,
    index=False
  )

예제 #6

0

파일 보기

파일: test.py 프로젝트: YuanFeis/Hand-Gesture-Recognition

def main(args):
    # extract information from the configuration file
    nb_frames = config.getint('general', 'nb_frames')
    skip = config.getint('general', 'skip')
    target_size = literal_eval(config.get('general', 'target_size'))
    batch_size = config.getint('general', 'batch_size')
    epochs = config.getint('general', 'epochs')
    nb_classes = config.getint('general', 'nb_classes')

    model_name = config.get('path', 'model_name')
    data_root = config.get('path', 'data_root')
    data_model = config.get('path', 'data_model')
    data_vid = config.get('path', 'data_vid')

    path_weights = config.get('path', 'path_weights')

    csv_labels = config.get('path', 'csv_labels')
    csv_test = config.get('path', 'csv_test')

    workers = config.getint('option', 'workers')
    use_multiprocessing = config.getboolean('option', 'use_multiprocessing')
    max_queue_size = config.getint('option', 'max_queue_size')

    # join together the needed paths
    path_vid = os.path.join(data_root, data_vid)
    path_model = os.path.join(data_root, data_model, model_name)
    path_labels = os.path.join(data_root, csv_labels)
    path_test = os.path.join(data_root, csv_test)

    # Input shape of the input Tensor
    inp_shape = (nb_frames, ) + target_size + (3, )

    # load the data using the DataLoader class
    data = DataLoader(path_vid, path_labels, path_test=path_test)

    # create the generator for the test set
    gen = kmg.ImageDataGenerator()
    gen_test = gen.flow_video_from_dataframe(data.test_df,
                                             path_vid,
                                             shuffle=False,
                                             path_classes=path_labels,
                                             class_mode=None,
                                             x_col='video_id',
                                             target_size=target_size,
                                             batch_size=batch_size,
                                             nb_frames=nb_frames,
                                             skip=skip,
                                             has_ext=True)

    # build and compile RESNET3D model
    # net = Resnet3DBuilder.build_resnet_101(inp_shape, nb_classes, drop_rate=0.5)

    # build and compile CNN3D Lite model
    net = model.CNN3D_lite(inp_shape=inp_shape, nb_classes=nb_classes)

    # if weights file is present load the weights
    if (path_weights != "None"):
        print("Loading weights from : " + path_weights)
        net.load_weights(path_weights)
    else:
        sys.exit(
            "<Error>: Specify a value for path_weights different from None when using test mode"
        )

    # get the number of samples in the test set
    nb_sample_test = data.test_df["video_id"].size

    res = net.predict_generator(
        generator=gen_test,
        steps=ceil(nb_sample_test / batch_size),
        verbose=1,
        workers=workers,
        use_multiprocessing=use_multiprocessing,
    )

    # create an empty column called label
    data.test_df['label'] = ""

    # for each result get the string label and set it in the dataFrame
    for i, item in enumerate(res):
        item[item == np.max(item)] = 1
        item[item != np.max(item)] = 0
        label = data.categorical_to_label(item)

        data.test_df.at[i, 'label'] = label

    # save the resulting dataframe to a csv
    data.test_df.to_csv(os.path.join(path_model, "prediction.csv"),
                        sep=';',
                        header=False,
                        index=False)

예제 #7

0

파일 보기

def main(args):
    #Extracting the information from the configuration file
    mode = config.get('general', 'mode')
    nb_frames = config.getint('general', 'nb_frames')
    skip = config.getint('general', 'skip')
    target_size = literal_eval(config.get('general', 'target_size'))
    batch_size = config.getint('general', 'batch_size')
    epochs = config.getint('general', 'epochs')
    nb_classes = config.getint('general', 'nb_classes')

    model_name = config.get('path', 'model_name')
    data_root = config.get('path', 'data_root')
    data_model = config.get('path', 'data_model')
    data_vid = config.get('path', 'data_vid')

    path_weights = config.get('path', 'path_weights')

    csv_labels = config.get('path', 'csv_labels')
    csv_train = config.get('path', 'csv_train')
    csv_val = config.get('path', 'csv_val')
    csv_test = config.get('path', 'csv_test')

    #Joining together the needed paths
    path_vid = os.path.join(data_root, data_vid)
    path_model = os.path.join(data_root, data_model, model_name)
    path_labels = os.path.join(data_root, csv_labels)
    path_train = os.path.join(data_root, csv_train)
    path_val = os.path.join(data_root, csv_val)
    path_test = os.path.join(data_root, csv_test)

    #Input shape of the input Tensor
    inp_shape = (nb_frames, ) + target_size + (3, )

    if mode == 'train':
        data = DataLoader(path_vid, path_labels, path_train, path_val)

        #Creating the model and graph folder
        mkdirs(path_model, 0o755)
        mkdirs(os.path.join(path_model, "graphs"), 0o755)

        #Creating the generators for the training and validation set
        gen = kmg.ImageDataGenerator()
        gen_train = gen.flow_video_from_dataframe(data.train_df,
                                                  path_vid,
                                                  path_classes=path_labels,
                                                  x_col='video_id',
                                                  y_col="label",
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  nb_frames=nb_frames,
                                                  skip=skip,
                                                  has_ext=True)
        gen_val = gen.flow_video_from_dataframe(data.val_df,
                                                path_vid,
                                                path_classes=path_labels,
                                                x_col='video_id',
                                                y_col="label",
                                                target_size=target_size,
                                                batch_size=batch_size,
                                                nb_frames=nb_frames,
                                                skip=skip,
                                                has_ext=True)

        net = Resnet3DBuilder.build_resnet_101(inp_shape,
                                               nb_classes,
                                               drop_rate=0.5)
        opti = SGD(lr=0.01, momentum=0.9, decay=0.0001, nesterov=False)
        net.compile(optimizer=opti,
                    loss="categorical_crossentropy",
                    metrics=["accuracy"])

        if (path_weights != "None"):
            print("Loading weights from : " + path_weights)
            net.load_weights(path_weights)

        model_file_format_best = os.path.join(path_model, 'model.best.hdf5')
        checkpointer_best = ModelCheckpoint(model_file_format_best,
                                            monitor='val_accuracy',
                                            verbose=1,
                                            save_best_only=True,
                                            mode='max')
        history_graph = HistoryGraph(
            model_path_name=os.path.join(path_model, "graphs"))

        #Get the number of sample in the training and validation set
        nb_sample_train = data.train_df["video_id"].size
        nb_sample_val = data.val_df["video_id"].size

        #Launch the training
        net.fit(
            gen_train,
            steps_per_epoch=ceil(nb_sample_train / batch_size),
            epochs=epochs,
            validation_data=gen_val,
            validation_steps=ceil(nb_sample_val / batch_size),
            shuffle=True,
            verbose=1,
            callbacks=[checkpointer_best, history_graph],
        )
    elif mode == 'test':
        data = DataLoader(path_vid, path_labels, path_test=path_test)

        gen = kmg.ImageDataGenerator()
        gen_test = gen.flow_video_from_dataframe(data.test_df,
                                                 path_vid,
                                                 shuffle=False,
                                                 path_classes=path_labels,
                                                 class_mode=None,
                                                 x_col='video_id',
                                                 target_size=target_size,
                                                 batch_size=batch_size,
                                                 nb_frames=nb_frames,
                                                 skip=skip,
                                                 has_ext=True)

        #Building model
        net = Resnet3DBuilder.build_resnet_101(inp_shape, nb_classes)

        if (path_weights != "None"):
            print("Loading weights from : " + path_weights)
            net.load_weights(path_weights)
        else:
            sys.exit(
                "<Error>: Specify a value for path_weights different from None when using test mode"
            )

        #Get the number of sample in the test set
        nb_sample_test = data.test_df["video_id"].size

        res = net.predict(gen_test,
                          steps=ceil(nb_sample_test / batch_size),
                          verbose=1)

        #Create an empty column called label
        data.test_df['label'] = ""

        #For each result get the string label and set it in the DataFrame
        for i, item in enumerate(res):
            item[item == np.max(item)] = 1
            item[item != np.max(item)] = 0
            label = data.categorical_to_label(item)

            data.test_df.at[i, 'label'] = label  #Faster than iloc

        #Save the resulting DataFrame to a csv
        data.test_df.to_csv(os.path.join(path_model, "prediction.csv"),
                            sep=';',
                            header=False,
                            index=False)
    else:
        sys.exit("<Error>: Use either {train,test} mode")

예제 #8

0

파일 보기

파일: inference.py 프로젝트: liCCcccs/PV_power

 def __init__(self, arguments):
     logger.debug("Initializing %s: (args: %s)", self.__class__.__name__, arguments)
     self._args = arguments
     self._data = DataLoader()

예제 #9

0

파일 보기

cap = cv2.VideoCapture(0)
cap.set(3,960)
cap.set(4,640)
cap.set(11,0)
cap.set(12,100)

# Loading the model
path_model = os.path.join(data_root, data_model, model_name)
inp_shape = (nb_frames,) + target_size + (3,)
net = Resnet3DBuilder.build_resnet_101(inp_shape, nb_classes)
if path_weights != "None":
    print("Loading weights from : " + path_weights)
    net.load_weights(path_weights)
else:
    sys.exit("<Error>: Specify a value for path_weights different from None when using test mode")
data = DataLoader(path_vid, path_labels)

with open('./data/csv_files/jester-v1-labels.csv') as f:
    f_csv = csv.reader(f)
    label_list = []
    for row in f_csv:
        label_list.append(row)
    label_list = tuple(label_list)

queue = frame_queue(nb_frames, target_size)
while True:
    ret, frame = cap.read()
    if not ret:
        sys.exit('<Error> can not capture video')

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

예제 #10

0

파일 보기

파일: main.py 프로젝트: tranvangaohd1994/gesture_recognition

def main_predict_on_thread():
    config = configparser.ConfigParser()
    config.read('config.cfg')
    args = config
    #Extracting the information from the configuration file
    mode = config.get('general', 'mode')
    nb_frames = config.getint('general', 'nb_frames')
    skip = config.getint('general', 'skip')
    target_size = literal_eval(config.get('general', 'target_size'))
    batch_size = config.getint('general', 'batch_size')
    epochs = config.getint('general', 'epochs')
    nb_classes = config.getint('general', 'nb_classes')

    model_name = config.get('path', 'model_name')
    data_root = config.get('path', 'data_root')
    data_model = config.get('path', 'data_model')
    data_vid = config.get('path', 'data_vid')

    path_weights = config.get('path', 'path_weights')

    csv_labels = config.get('path', 'csv_labels')
    csv_train = config.get('path', 'csv_train')
    csv_val = config.get('path', 'csv_val')
    csv_test = config.get('path', 'csv_test')

    workers = config.getint('option', 'workers')
    use_multiprocessing = config.getboolean('option', 'use_multiprocessing')
    max_queue_size = config.getint('option', 'max_queue_size')

    #Joining together the needed paths
    path_vid = os.path.join(data_root, data_vid)
    path_model = os.path.join(data_root, data_model)
    path_labels = os.path.join(data_root, csv_labels)
    path_train = os.path.join(data_root, csv_train)
    path_val = os.path.join(data_root, csv_val)
    path_test = os.path.join(data_root, csv_test)

    #Input shape of the input Tensor
    #inp_shape = (None, None, None, 3)
    inp_shape = (nb_frames, ) + target_size + (3, )
    img_show_camera = True

    if mode == 'train':
        data = DataLoader(path_vid, path_labels, path_train, path_val)

        #Creating the model and graph folder
        mkdirs(path_model, 0o755)
        mkdirs(os.path.join(path_model, "graphs"), 0o755)

        #Creating the generators for the training and validation set
        gen = kmg.ImageDataGenerator()
        gen_train = gen.flow_video_from_dataframe(data.train_df,
                                                  path_vid,
                                                  path_classes=path_labels,
                                                  x_col='video_id',
                                                  y_col="label",
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  nb_frames=nb_frames,
                                                  skip=skip,
                                                  has_ext=True)
        gen_val = gen.flow_video_from_dataframe(data.val_df,
                                                path_vid,
                                                path_classes=path_labels,
                                                x_col='video_id',
                                                y_col="label",
                                                target_size=target_size,
                                                batch_size=batch_size,
                                                nb_frames=nb_frames,
                                                skip=skip,
                                                has_ext=True)
        """
        #Building model
        net = model.CNN3D(inp_shape=inp_shape,nb_classes=nb_classes, drop_rate=0.5)
        #Compiling model 
        net.compile(optimizer="Adadelta",
                   loss="categorical_crossentropy",
                   metrics=["accuracy", "top_k_categorical_accuracy"]) 
        """

        net = Resnet3DBuilder.build_resnet_101(inp_shape,
                                               nb_classes,
                                               drop_rate=0.5)
        opti = SGD(lr=0.01, momentum=0.9, decay=0.0001, nesterov=False)
        net.compile(optimizer=opti,
                    loss="categorical_crossentropy",
                    metrics=["accuracy"])

        if (path_weights != "None"):
            print("Loading weights from : " + path_weights)
            net.load_weights(path_weights)

        #model_file_format_last = os.path.join(path_model,'model.{epoch:03d}.hdf5')
        model_file_format_best = os.path.join(path_model, 'model.best.hdf5')

        checkpointer_best = ModelCheckpoint(model_file_format_best,
                                            monitor='val_acc',
                                            verbose=1,
                                            save_best_only=True,
                                            mode='max')
        #checkpointer_last = ModelCheckpoint(model_file_format_last, period=1)

        history_graph = HistoryGraph(
            model_path_name=os.path.join(path_model, "graphs"))

        #Get the number of sample in the training and validation set
        nb_sample_train = data.train_df["video_id"].size
        nb_sample_val = data.val_df["video_id"].size

        #Launch the training
        net.fit_generator(
            generator=gen_train,
            steps_per_epoch=ceil(nb_sample_train / batch_size),
            epochs=epochs,
            validation_data=gen_val,
            validation_steps=ceil(nb_sample_val / batch_size),
            shuffle=True,
            verbose=1,
            workers=workers,
            max_queue_size=max_queue_size,
            use_multiprocessing=use_multiprocessing,
            callbacks=[checkpointer_best, history_graph],
        )

    elif mode == 'test':
        data = DataLoader(path_vid, path_labels, path_test=path_test)
        gen = kmg.ImageDataGenerator()
        gen_test = gen.flow_video_from_dataframe(data.test_df,
                                                 path_vid,
                                                 shuffle=False,
                                                 path_classes=path_labels,
                                                 class_mode=None,
                                                 x_col='video_id',
                                                 target_size=target_size,
                                                 batch_size=batch_size,
                                                 nb_frames=nb_frames,
                                                 skip=skip,
                                                 has_ext=True)

        #test flow data
        co = 0
        for fl in gen_test:
            co += 1
            if co > 10: break
            print(fl.shape)
            for i in range(16):
                cv2.imshow('frame', fl[0][i])
                cv2.waitKey()
        exit()

        #Building model
        net = Resnet3DBuilder.build_resnet_50(inp_shape, nb_classes)

        if (path_weights != "None"):
            print("Loading weights from : " + path_weights)
            net.load_weights(path_weights)
        else:
            sys.exit(
                "<Error>: Specify a value for path_weights different from None when using test mode"
            )

        #Get the number of sample in the test set
        nb_sample_test = data.test_df["video_id"].size

        res = net.predict_generator(
            generator=gen_test,
            steps=ceil(nb_sample_test / batch_size),
            verbose=1,
            workers=workers,
            use_multiprocessing=use_multiprocessing,
        )

        #Create an empty column called label
        data.test_df['label'] = ""

        #For each result get the string label and set it in the DataFrame
        for i, item in enumerate(res):
            item[item == np.max(item)] = 1
            item[item != np.max(item)] = 0
            label = data.categorical_to_label(item)

            #data.test_df.iloc[i,data.test_df.columns.get_loc('label')] = label
            data.test_df.at[i, 'label'] = label  #Faster than iloc

        #Save the resulting DataFrame to a csv
        data.test_df.to_csv(os.path.join(path_model, "prediction.csv"),
                            sep=';',
                            header=False,
                            index=False)

    elif mode == 'video':
        label_res = ''
        skip, fr, delay_ = 2, 0, 5
        data_frame = []
        data_map = DataLoader(path_vid, path_labels, path_test=path_test)
        #build model
        net = Resnet3DBuilder.build_resnet_50(inp_shape, nb_classes)
        net.load_weights(path_weights)
        print('load model done')
        #open camera
        cap = cv2.VideoCapture(0)
        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        #out = cv2.VideoWriter('gao_video.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 24, (int(cap.get(3)),int(cap.get(4))))
        ret, frame = cap.read()
        last_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        while (cap.isOpened()):
            #show video
            ret, frame = cap.read()
            #xu ly tru  nen phat hien khoang lang
            gray = cv2.cvtColor(
                frame, cv2.COLOR_BGR2GRAY)  # convert color image to gray
            gray = cv2.GaussianBlur(gray, (5, 5), 0)
            diff = cv2.absdiff(gray, last_gray)  # frame difference!
            last_gray = gray
            thresh = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY)[1]
            thresh = cv2.dilate(thresh, None, iterations=2)
            print('subtraction frame', np.mean(thresh))
            if img_show_camera:
                cv2.putText(frame, label_res, (50, 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 2)
                cv2.imshow('frame', frame)
                cv2.imshow('diff', diff)
                cv2.imshow('thresh', thresh)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            continue
            #process image to push to data_frame to gesture regconition
            frameRGB = frame  # cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frameRGB = cv2.resize(frameRGB, (target_size[1], target_size[0]),
                                  interpolation=cv2.INTER_NEAREST)

            #get frame to data_frame
            fr += 1
            delay_ -= 1
            if fr % skip and delay_ < 0: data_frame.append(frameRGB)

            if len(data_frame) == 16:
                t1 = time.time()
                data_frame = [data_frame]
                data_frame = np.array(data_frame) / 255
                res = net.predict(data_frame)
                prob = np.max(res)
                for i, item in enumerate(res):
                    item[item == np.max(item)] = 1
                    item[item != np.max(item)] = 0
                    label = data_map.categorical_to_label(item)
                    if prob >= 0.6: label_res = label
                    print(label, prob)
                #reset data_frame
                delay_ = 5
                data_frame = []
                print('video/s:', 1 / (time.time() - t1))
        # When everything done, release the video capture and video write objects
        cap.release()
        #out.release()
        cv2.destroyAllWindows()

    else:
        sys.exit("<Error>: Use either {train,test} mode")

예제 #11

0

파일 보기

파일: test.py 프로젝트: sudnya/seq_to_seq_mt

from lib.data_loader import DataLoader

print 'Vocabs', DataLoader.get_vocab_stats('data/vocab.en.txt')

예제 #12

0

파일 보기

def main(args):
    # extract information from the configuration file
    nb_frames = config.getint('general', 'nb_frames')
    skip = config.getint('general', 'skip')
    target_size = literal_eval(config.get('general', 'target_size'))
    batch_size = config.getint('general', 'batch_size')
    epochs = config.getint('general', 'epochs')
    nb_classes = config.getint('general', 'nb_classes')

    model_name = config.get('path', 'model_name')
    data_root = config.get('path', 'data_root')
    data_model = config.get('path', 'data_model')
    data_vid = config.get('path', 'data_vid')

    path_weights = config.get('path', 'path_weights')

    csv_labels = config.get('path', 'csv_labels')
    csv_train = config.get('path', 'csv_train')
    csv_val = config.get('path', 'csv_val')

    workers = config.getint('option', 'workers')
    use_multiprocessing = config.getboolean('option', 'use_multiprocessing')
    max_queue_size = config.getint('option', 'max_queue_size')

    # join together the needed paths
    path_vid = os.path.join(data_root, data_vid)
    path_model = os.path.join(data_root, data_model, model_name)
    path_labels = os.path.join(data_root, csv_labels)
    path_train = os.path.join(data_root, csv_train)
    path_val = os.path.join(data_root, csv_val)

    # Input shape of the input Tensor
    inp_shape = (nb_frames, ) + target_size + (3, )

    # load the data using DataLoader class
    data = DataLoader(path_vid, path_labels, path_train, path_val)

    # create model folder
    mkdirs(path_model, 0o755)

    # create the generators for the training and validation set
    gen = kmg.ImageDataGenerator()
    gen_train = gen.flow_video_from_dataframe(data.train_df,
                                              path_vid,
                                              path_classes=path_labels,
                                              x_col='video_id',
                                              y_col="label",
                                              target_size=target_size,
                                              batch_size=batch_size,
                                              nb_frames=nb_frames,
                                              skip=skip,
                                              has_ext=True)
    gen_val = gen.flow_video_from_dataframe(data.val_df,
                                            path_vid,
                                            path_classes=path_labels,
                                            x_col='video_id',
                                            y_col="label",
                                            target_size=target_size,
                                            batch_size=batch_size,
                                            nb_frames=nb_frames,
                                            skip=skip,
                                            has_ext=True)

    # MODEL

    # # Build and compile RESNET3D model
    # net = Resnet3DBuilder.build_resnet_101(inp_shape, nb_classes, drop_rate=0.5)
    # opti = SGD(lr=0.01, momentum=0.9, decay= 0.0001, nesterov=False)
    # net.compile(optimizer=opti,
    #             loss="categorical_crossentropy",
    #             metrics=["accuracy"])

    # Build and compile RadhaKrishna model
    net = model.RadhaKrishna(inp_shape=inp_shape, nb_classes=nb_classes)
    net.compile(optimizer="adam",
                loss="categorical_crossentropy",
                metrics=["accuracy", "top_k_categorical_accuracy"])

    # if model weights file is present
    # load the model weights
    if (path_weights != "None"):
        print("Loading weights from : " + path_weights)
        net.load_weights(path_weights)

    # file format for saving the best model
    # model_file_format_best = os.path.join(path_model,'radhakrishna.hdf5')
    # save checkpoint
    checkpoint_path = "training_1/cp.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)

    # checkpoint the best model
    # checkpointer_best = ModelCheckpoint(model_file_format_best, monitor='val_accuracy',verbose=1, save_best_only=True, mode='max')
    # checkpointer_best = ModelCheckpoint(filepath=os.path.join(path_model, 'model.{epoch:02d}-{val_loss:.2f}.h5'), monitor='val_accuracy',verbose=1, save_best_only=True, mode='max')
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

    # Create a callback that saves the model's weights
    cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                  monitor='val_accuracy',
                                                  save_weights_only=True,
                                                  verbose=1)

    # get the number of samples in the training and validation set
    nb_sample_train = data.train_df["video_id"].size
    nb_sample_val = data.val_df["video_id"].size

    # launch the training
    net.fit_generator(
        generator=gen_train,
        steps_per_epoch=ceil(nb_sample_train / batch_size),
        epochs=epochs,
        validation_data=gen_val,
        validation_steps=ceil(nb_sample_val / batch_size),
        shuffle=True,
        verbose=1,
        workers=workers,
        max_queue_size=max_queue_size,
        use_multiprocessing=use_multiprocessing,
        callbacks=[cp_callback, es],
    )

    model.save_weights('./checkpoints/radhakrishna')
    # after training serialize the final model to JSON
    model_json = net.to_json()
    with open(model_name + ".json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    net.save_weights(model_name + ".h5")
    print("Saved model to disk")