def get_values(observation): return flatten(concat( observation['my_car'].values(), mapcat(methodcaller('values'), sorted(observation['other_cars'], key=itemgetter('position_length'))), # 距離が近い順にソートします。前後も分けたほうが良い? mapcat(methodcaller('values'), sorted(observation['obstacles' ], key=itemgetter('position_length'))), # noqa: E202 mapcat(methodcaller('values'), sorted(observation['stars' ], key=itemgetter('position_length'))) # noqa: E202 ))
def prepare_file_path(kwargs): """Determine file path from the first output name. Used in creating .dvc files. """ from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX out = first( concat( kwargs.get("outs", []), kwargs.get("outs_no_cache", []), kwargs.get("metrics", []), kwargs.get("metrics_no_cache", []), kwargs.get("plots", []), kwargs.get("plots_no_cache", []), kwargs.get("outs_persist", []), kwargs.get("outs_persist_no_cache", []), kwargs.get("checkpoints", []), without([kwargs.get("live", None)], None), ) ) return ( os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX if out else DVC_FILE )
def group_equal(iter, equal=operator.eq): groups = [] for x in iter: if not contains(list(concat(*groups)), x, equal=equal): groups += [[x]] else: i = [i for i, g in enumerate(groups) if contains(g, x, equal=equal)][0] groups[i] += [x] return groups
def _random_position(self, sigma): return first( filter( lambda p: all( map(lambda b: (b.position - p).length >= 50, concat(self.cars, self.obstacles, self.stars))), filter( lambda p: 100 < p.length < 950, repeatedly(lambda: pymunk.Vec2d( self.game_random.gauss(0, sigma), 0).rotated( self.game_random.uniform(0, pi * 2))))))
def main(): (x_train, y_train), (x_validation, y_validation) = load_data() model = Model(*juxt(identity, computational_graph(y_train.shape[1]))(Input( shape=x_train.shape[1:]))) model.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['accuracy' ]) # 論文にはnesterov=Trueだと書いてあったけど、コードだとFalseだった……。 model.summary() # plot_model(model, to_file='./results/model.png') train_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True) validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True) for data in (train_data, validation_data): data.fit(x_train) # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。 batch_size = 128 epoch_size = 200 results = model.fit_generator( train_data.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=x_train.shape[0] // batch_size, epochs=epoch_size, callbacks=[ LearningRateScheduler( partial( getitem, tuple( take( epoch_size, concat(repeat(0.1, 60), repeat(0.02, 60), repeat(0.004, 40), repeat(0.0008)))))) ], validation_data=validation_data.flow(x_validation, y_validation, batch_size=batch_size), validation_steps=x_validation.shape[0] // batch_size) with open('./results/history.pickle', 'wb') as f: pickle.dump(results.history, f) save_model(model, './results/model.h5') del model
def get_interesting_repos(g: Github, session: Any) -> List[Repository]: repos: List[Repository] = [] grepos = g.search_repositories(query='stars:>250 forks:>50', sort='stars', order='desc') records = zip(grepos, fy.repeat('most_stars')) grepos = g.search_repositories(query='forks:>5 topic:kaggle-competition', sort='stars', order='desc') records = fy.concat(records, zip(grepos, fy.repeat('kaggle'))) grepos = g.search_repositories(query='forks:>5 topic:tensorflow-model', sort='stars', order='desc') records = fy.concat(records, zip(grepos, fy.repeat('tensorflow-model'))) grepos = g.search_repositories( query='cookiecutterdatascience in:readme forks:>5 stars:>0 fork:true', sort='stars', order='desc') records = fy.concat(records, zip(grepos, fy.repeat('cookiecutterdatascience'))) for grepo, search_method in tqdm(records): repo = (session.query(Repository).filter( Repository.id == grepo.full_name).one_or_none()) if repo is None: repo = Repository( id=grepo.full_name, owner=grepo.owner.login, name=grepo.name, description=grepo.description, search_method=search_method, ) repos.append(repo) return repos
def _append_car(self, position, angle): car = Car(self.space) car.set_position_and_angle(position, angle) car.crash_energy = 0 car.score = 0 for shape in concat( car.shapes, mapcat(lambda tire: tire.shapes, (car.tire_lf, car.tire_rf, car.tire_lr, car.tire_rr))): shape.collision_type = 1 self.cars.append(car)
def _get_file_path(kwargs): from dvc.dvcfile import DVC_FILE_SUFFIX, DVC_FILE out = first( concat( kwargs.get("outs", []), kwargs.get("outs_no_cache", []), kwargs.get("metrics", []), kwargs.get("metrics_no_cache", []), kwargs.get("outs_persist", []), kwargs.get("outs_persist_no_cache", []), )) return (os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX if out else DVC_FILE)
def _create_observation(cls, game): def get_values(observation): return flatten(concat( observation['my_car'].values(), mapcat(methodcaller('values'), sorted(observation['other_cars'], key=itemgetter('position_length'))), # 距離が近い順にソートします。前後も分けたほうが良い? mapcat(methodcaller('values'), sorted(observation['obstacles' ], key=itemgetter('position_length'))), # noqa: E202 mapcat(methodcaller('values'), sorted(observation['stars' ], key=itemgetter('position_length'))) # noqa: E202 )) observation = ( np.array(tuple(get_values(game.create_observation(game.cars[0]))), np.float32) / # noqa: W504 np.array(tuple(concat( ( 1000, # my_car.position.x 1000, # my_car.position.y np.pi, # my_car.angle np.pi, # my_car.velocity_angle MAX_SPEED / FPS, # my_car.velocity_length np.pi, # my_car.steering_angle 10, # my_car.steering_torque 30, # my_car.score 10 * FPS, # my_car.crash_energy ), mapcat(lambda _: ( np.pi, # other_car.position_angle 1000, # other_car.position_length np.pi, # other_car.angle np.pi, # other_car.velocity_angle MAX_SPEED / FPS * 2, # other_car.velocity_length np.pi, # other_car.steering_angle 30, # other_car.score 10 * FPS, # other_car.crash_energy ), range(7)), mapcat(lambda _: ( np.pi, # obstacle.position_angle 1000 # obstacle.position_length ), range(OBSTACLE_COUNT)), mapcat(lambda _: ( np.pi, # star.position_angle 1000 # star.position_length ), range(STAR_COUNT)), )), dtype=np.float32) ) observation[observation < -1] = -1 observation[observation > 1] = 1 # noqa: E222 return observation
def step(self): self.elapse += 1 self.actions = [] for car, player in zip(self.cars, concat(self.players, repeat(None))): # アクションを取得します。 acceleration, braking, steering = player.get_action( self.create_observation(car)) if player else (0, 0, 0) # アクションを正規化します。 acceleration = self._clip(acceleration, -1, 1) braking = self._clip(braking, 0, 1) # noqa: E221, E241 steering = self._clip(steering, -1, 1) # noqa: E221, E241 # 正規化したアクションを記録します。 self.actions.append((acceleration, braking, steering)) # 衝突して故障した車は、修理が終わるまでは行動できません。 if car.crash_energy > 0: car.crash_energy = max(car.crash_energy - 100000, 0) continue # ゆらぎを出すために、アクションに小さな正規乱数を加えます。スターの次の出現位置が変わると強化学習が難しくなりそうなので、別のRandomインスタンスを使用します。 acceleration = self._clip( acceleration + self.control_random.gauss(0, 0.05), -1, 1) braking = self._clip(braking + self.control_random.gauss(0, 0.05), 0, 1) # noqa: E221, E241 steering = self._clip(steering + self.control_random.gauss(0, 0.05), -1, 1) # noqa: E221 # アクションを実行します。 car.accelerate(acceleration * 20000) car.brake(braking * 200000) car.steer(steering * 20000) self.space.step(1 / FPS) for star in filter(lambda star: star.is_catched, self.stars): self._reset_star_position(star) star.is_catched = False return self.elapse >= GAME_PERIOD_SEC * FPS # ゲームはGAME_PERIOD_SECで終了します。
def _get_file_path(kwargs): from dvc.dvcfile import DVC_FILE, DVC_FILE_SUFFIX out = first( concat( kwargs.get("outs", []), kwargs.get("outs_no_cache", []), kwargs.get("metrics", []), kwargs.get("metrics_no_cache", []), kwargs.get("plots", []), kwargs.get("plots_no_cache", []), kwargs.get("outs_persist", []), kwargs.get("outs_persist_no_cache", []), kwargs.get("checkpoints", []), without([kwargs.get("live", None)], None), )) return (os.path.basename(os.path.normpath(out)) + DVC_FILE_SUFFIX if out else DVC_FILE)
def __init__(self): self._seed = None self.name = 'SelfDriving' self.action_space = gym.spaces.Box(np.array((-1, -1, -1), dtype=np.float32), np.array((1, 1, 1), dtype=np.float32), dtype=np.float32) self.observation_space = gym.spaces.Box( np.array( tuple( concat( ( -1, # my_car.position.x -1, # my_car.position.y -1, # my_car.angle -1, # my_car.velocity_angle 0, # my_car.velocity_length -1, # my_car.steering_angle -1, # my_car.steering_torque 0, # my_car.score 0, # my_car.crash_energy ), mapcat( lambda _: ( -1, # other_car.position_angle 0, # other_car.position_length -1, # other_car.angle -1, # other_car.velocity_angle 0, # other_car.velocity_length -1, # other_car.steering_angle 0, # other_car.score 0, # other_car.crash_energy ), range(7)), mapcat( lambda _: ( -1, # obstacle.position_angle 0 # obstacle.position_length ), range(OBSTACLE_COUNT)), mapcat( lambda _: ( -1, # star.position_angle 0 # star.position_length ), range(STAR_COUNT)), )), dtype=np.float32), np.array( tuple( concat( ( 1, # my_car.position.x 1, # my_car.position.y 1, # my_car.angle 1, # my_car.velocity_angle 1, # my_car.velocity_length 1, # my_car.steering_angle 1, # my_car.steering_torque 1, # my_car.score 1, # my_car.crash_energy ), mapcat( lambda _: ( 1, # other_car.position_angle 1, # other_car.position_length 1, # other_car.angle 1, # other_car.velocity_angle 1, # other_car.velocity_length 1, # other_car.steering_angle 1, # other_car.score 1, # other_car.crash_energy ), range(7)), mapcat( lambda _: ( 1, # obstacle.position_angle 1 # obstacle.position_length ), range(OBSTACLE_COUNT)), mapcat( lambda _: ( 1, # star.position_angle 1 # star.position_length ), range(STAR_COUNT)), )), dtype=np.float32), dtype=np.float32) self.screen = None self.reset()
def _process( self, named_cache, remote, jobs=None, show_checksums=False, download=False, ): logger.debug( "Preparing to {} '{}'".format( "download data from" if download else "upload data to", remote.path_info, ) ) if download: func = partial( remote.download, dir_mode=self._dir_mode, file_mode=self._file_mode, ) status = STATUS_DELETED desc = "Downloading" else: func = remote.upload status = STATUS_NEW desc = "Uploading" if jobs is None: jobs = remote.JOBS dir_status, file_status, dir_contents = self._status( named_cache, remote, jobs=jobs, show_checksums=show_checksums, download=download, ) dir_plans = self._get_plans(download, remote, dir_status, status) file_plans = self._get_plans(download, remote, file_status, status) total = len(dir_plans[0]) + len(file_plans[0]) if total == 0: return 0 with Tqdm(total=total, unit="file", desc=desc) as pbar: func = pbar.wrap_fn(func) with ThreadPoolExecutor(max_workers=jobs) as executor: if download: fails = sum(executor.map(func, *dir_plans)) fails += sum(executor.map(func, *file_plans)) else: # for uploads, push files first, and any .dir files last file_futures = {} for from_info, to_info, name, checksum in zip(*file_plans): file_futures[checksum] = executor.submit( func, from_info, to_info, name ) dir_futures = {} for from_info, to_info, name, dir_checksum in zip( *dir_plans ): wait_futures = { future for file_checksum, future in file_futures.items() if file_checksum in dir_contents[dir_checksum] } dir_futures[dir_checksum] = executor.submit( self._dir_upload, func, wait_futures, from_info, to_info, name, ) fails = sum( future.result() for future in concat( file_futures.values(), dir_futures.values() ) ) if fails: if download: remote.index.clear() raise DownloadError(fails) raise UploadError(fails) if not download: # index successfully pushed dirs for dir_checksum, future in dir_futures.items(): if future.result() == 0: file_checksums = dir_contents[dir_checksum] logger.debug( "Indexing pushed dir '{}' with " "'{}' nested files".format( dir_checksum, len(file_checksums) ) ) remote.index.update([dir_checksum], file_checksums) return len(dir_plans[0]) + len(file_plans[0])
def _process( self, named_cache, remote, jobs=None, show_checksums=False, download=False, ): logger.debug("Preparing to {} '{}'".format( "download data from" if download else "upload data to", remote.path_info, )) if download: func = partial( remote.download, dir_mode=self._dir_mode, file_mode=self._file_mode, ) status = STATUS_DELETED else: func = remote.upload status = STATUS_NEW if jobs is None: jobs = remote.JOBS dir_status, file_status, dir_paths = self._status( named_cache, remote, jobs=jobs, show_checksums=show_checksums, download=download, ) dir_plans = self._get_plans(download, remote, dir_status, status) file_plans = self._get_plans(download, remote, file_status, status) if len(dir_plans[0]) + len(file_plans[0]) == 0: return 0 with ThreadPoolExecutor(max_workers=jobs) as executor: if download: fails = sum(executor.map(func, *dir_plans)) fails += sum(executor.map(func, *file_plans)) else: # for uploads, push files first, and any .dir files last file_futures = {} for from_info, to_info, name in zip(*file_plans): file_futures[to_info] = executor.submit( func, from_info, to_info, name) dir_futures = {} for from_info, to_info, name in zip(*dir_plans): wait_futures = { future for file_path, future in file_futures.items() if file_path in dir_paths[to_info] } dir_futures[to_info] = executor.submit( self._dir_upload, func, wait_futures, from_info, to_info, name, ) fails = sum(future.result() for future in concat( file_futures.values(), dir_futures.values())) if fails: if download: raise DownloadError(fails) raise UploadError(fails) return len(dir_plans[0]) + len(file_plans[0])
def compute_output_shape(self, input_shape): return tuple(concat(butlast(input_shape), (self.output_channel_size,)))
def run(): batch_size = 32 num_classes = 10 epochs = 200 with tf.device("/cpu:0"): (x_train, y_train), (x_test, y_test) = cifar10.load_data() # Convert class vectors to binary class matrices. y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) x_train = x_train.astype('float32', copy=False) x_test = x_test.astype('float32', copy=False) x_train /= 255 x_test /= 255 optimizer = Adam(lr=0.001) model = SqueezeNet(classes=num_classes) squeezenet_model_file = './sqz_log/model.h5' if os.path.exists(squeezenet_model_file): model.layers.pop() model = Model(name="sqzn_no_softmax", inputs=model.input, outputs=model.layers[-1].output) model.load_weights(squeezenet_model_file, by_name=True) # model.load_weights(squeezenet_model_file, by_name=True) else: # train a new SqueezeNet model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # train_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, # width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True) # validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True) train_data = ImageDataGenerator() validation_data = ImageDataGenerator() for data in (train_data, validation_data): data.fit(x_train) callbacks = [ LearningRateScheduler( partial( getitem, tuple( take( epochs, concat(repeat(0.01, 1), repeat(0.1, 99), repeat(0.01, 50), repeat(0.001)))))), ModelCheckpoint(filepath=squeezenet_model_file), TensorBoard(log_dir="./sqz_log", batch_size=batch_size) ] results = model.fit_generator( train_data.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, callbacks=callbacks, validation_data=validation_data.flow(x_test, y_test, batch_size=batch_size), validation_steps=x_test.shape[0] // batch_size) with open('./sqz_log/history.pickle', 'wb') as f: pickle.dump(results.history, f) save_model(model, squeezenet_model_file) # Build the siamese architecture # model_cut = Model(name="sqzn_no_softmax", inputs=model.input, outputs=model.layers[-1].output) # model_cut.load_weights(squeezenet_model_file, by_name=True) # with tf.device("/cpu:0"): # model_cut.summary() input_shape = x_train.shape[1:] im_in1 = Input(shape=input_shape) im_in2 = Input(shape=input_shape) feat_x1 = model(im_in1) feat_x2 = model(im_in2) lambda_merge = Lambda(euclidean_distance, output_shape=(1, ))([feat_x1, feat_x2]) siamese = Model(name="siamese", inputs=[im_in1, im_in2], outputs=lambda_merge) with tf.device("/cpu:0"): siamese.summary() optimizer = RMSprop() # SGD(momentum=0.9) siamese.compile(optimizer=optimizer, loss=contrastive_loss, metrics=[accuracy]) def make_img_pair(identical, from_train): """Select the image pairs""" label = np.random.randint(0, num_classes) if identical: if from_train: idx = np.nonzero(y_train[:, label] == 1)[0] else: idx = np.nonzero(y_test[:, label] == 1)[0] # pick any two indexes randomly id1 = np.random.randint(0, idx.shape[0]) id2 = np.random.randint(0, idx.shape[0]) while id1 == id2: id2 = np.random.randint(0, idx.shape[0]) else: if from_train: idx1 = np.nonzero(y_train[:, label] == 1)[0] idx2 = np.nonzero(y_train[:, (label + 1) % num_classes] == 1)[0] else: idx1 = np.nonzero(y_test[:, label] == 1)[0] idx2 = np.nonzero(y_train[:, (label + 1) % num_classes] == 1)[0] # pick any two indexes randomly id1 = np.random.randint(0, idx1.shape[0]) id2 = np.random.randint(0, idx2.shape[0]) if from_train: return np.array([x_train[id1], x_train[id2]]) else: return np.array([x_test[id1], x_test[id2]]) def generator(from_train): while True: X = [[None, None]] * batch_size y = [[None]] * batch_size indexes = np.arange(batch_size) identical = True for i in indexes: X[i] = make_img_pair(identical, from_train) y[i] = [1 if identical else 0] identical = not identical np.random.shuffle(indexes) X = np.asarray(X)[indexes] y = np.asarray(y)[indexes] # print("generator: from_train:", from_train, " - X:", X.shape, "- y:", y.shape) yield [X[:, 0], X[:, 1]], y siamese_model_file = "./siam_log/siamese.h5" epochs = 100 callbacks = [ LearningRateScheduler( partial( getitem, tuple( take( epochs, concat(repeat(0.01, 1), repeat(0.1, 99), repeat(0.01, 50), repeat(0.001)))))), ModelCheckpoint(filepath=siamese_model_file), TensorBoard(log_dir="./siam_log", batch_size=batch_size) ] outputs = siamese.fit_generator( generator(from_train=True), initial_epoch=0, steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, validation_data=generator(from_train=False), validation_steps=x_test.shape[0] // batch_size, callbacks=callbacks) with open('./siam_log/history.pickle', 'wb') as f: pickle.dump(outputs.history, f) save_model(siamese, siamese_model_file)
def main(): import os with tf.device("/cpu:0"): (x_train, y_train), (x_validation, y_validation) = load_data() batch_size = 32 epochs = 200 input_shape = Input(shape=x_train.shape[1:]) model_file = './results/model.h5' if os.path.exists(model_file): model = load_model(model_file) # with tf.device("/cpu:0"): # validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True) else: model = Model(*juxt(identity, computational_graph(y_train.shape[1]))( input_shape)) model.compile(loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['accuracy']) with tf.device("/cpu:0"): train_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True) validation_data = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True) for data in (train_data, validation_data): data.fit( x_train) # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。 results = model.fit_generator( train_data.flow(x_train, y_train, batch_size=batch_size), steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, callbacks=[ LearningRateScheduler( partial( getitem, tuple( take( epochs, concat(repeat(0.01, 1), repeat(0.1, 99), repeat(0.01, 50), repeat(0.001)))))) ], validation_data=validation_data.flow(x_validation, y_validation, batch_size=batch_size), validation_steps=x_validation.shape[0] // batch_size) with open('./results/history.pickle', 'wb') as f: pickle.dump(results.history, f) save_model(model, model_file) try: with tf.device("/cpu:0"): # model.summary() # print("=== AFTER POPPING THE LAST ===") model.layers.pop() # model.summary() # generate_confusion_matrix(model, x_validation, y_validation, batch_size) # plot_model(model, to_file='./results/model.png') except Exception as ex: print("plot_model failed with error:", repr(ex), "\nMoving on...") siamese(input_shape, model)
def _process( self, named_cache, remote, jobs=None, show_checksums=False, download=False, ): logger.debug("Preparing to {} '{}'".format( "download data from" if download else "upload data to", remote.tree.path_info, )) if download: func = partial( _log_exceptions(remote.tree.download, "download"), dir_mode=self.tree.dir_mode, file_mode=self.tree.file_mode, ) status = STATUS_DELETED desc = "Downloading" else: func = _log_exceptions(remote.tree.upload, "upload") status = STATUS_NEW desc = "Uploading" if jobs is None: jobs = remote.tree.JOBS dir_status, file_status, dir_contents = self._status( named_cache, remote, jobs=jobs, show_checksums=show_checksums, download=download, ) dir_plans, _ = self._get_plans(download, remote, dir_status, status) file_plans, missing_files = self._get_plans(download, remote, file_status, status) total = len(dir_plans[0]) + len(file_plans[0]) if total == 0: return 0 with Tqdm(total=total, unit="file", desc=desc) as pbar: func = pbar.wrap_fn(func) with ThreadPoolExecutor(max_workers=jobs) as executor: if download: from_infos, to_infos, names, _ = ( d + f for d, f in zip(dir_plans, file_plans)) fails = sum(executor.map(func, from_infos, to_infos, names)) else: # for uploads, push files first, and any .dir files last file_futures = {} for from_info, to_info, name, hash_ in zip(*file_plans): file_futures[hash_] = executor.submit( func, from_info, to_info, name) dir_futures = {} for from_info, to_info, name, dir_hash in zip(*dir_plans): # if for some reason a file contained in this dir is # missing both locally and in the remote, we want to # push whatever file content we have, but should not # push .dir file for file_hash in missing_files: if file_hash in dir_contents[dir_hash]: logger.debug( "directory '%s' contains missing files," "skipping .dir file upload", name, ) break else: wait_futures = { future for file_hash, future in file_futures.items() if file_hash in dir_contents[dir_hash] } dir_futures[dir_hash] = executor.submit( self._dir_upload, func, wait_futures, from_info, to_info, name, ) fails = sum(future.result() for future in concat( file_futures.values(), dir_futures.values())) if fails: if download: remote.index.clear() raise DownloadError(fails) raise UploadError(fails) if not download: # index successfully pushed dirs for dir_hash, future in dir_futures.items(): if future.result() == 0: file_hashes = dir_contents[dir_hash] logger.debug("Indexing pushed dir '{}' with " "'{}' nested files".format( dir_hash, len(file_hashes))) remote.index.update([dir_hash], file_hashes) return len(dir_plans[0]) + len(file_plans[0])
def export_twitterUser_emotion_analysis(db='UserPost',collection="user_post"): client = MongoClient() db_tweets = client['%s' % db] collect_tweets = db_tweets['%s' % collection] db_user = client['Twitter'] collect_user = db_user['twitter'] from funcy import flatten,concat,group_by # 根据现有的文章提取出用户 pipline = [ {"$match": { "site": "twitter" }}, {"$group": { "_id": "$user.id_str", "count": {"$sum": 1} }} ] result = list(collect_tweets.aggregate(pipline)) formatDocs = [] for id in list(map(lambda x: x['_id'], result)): # 查找该永和的用户信息 user_for_id = collect_user.find_one({'id_str': id}) # 查找该用户下的所有文章 user_for_id_tweets_count = collect_tweets.count({"user.id_str": id, "site": 'twitter'}) # print(user_for_id_tweets_count) if (user_for_id_tweets_count > 0): aggregate_for_user_tweets = collect_tweets.aggregate([ { "$match": { "user.id_str": id, "site": 'twitter' } }, {"$group": { "_id": "$user.id_str", "text":{"$push":"$text"} }} ]) user_tweets_texts = list(aggregate_for_user_tweets)[0] # print(len(user_tweets_texts['text'])) # print(texts) if len(user_tweets_texts['text'])>300: ops = [{'url':'https://tone-analyzer-demo.ng.bluemix.net/api/tone','data':''.join(user_tweets_texts['text'][i:i+300])} for i in range(0,len(user_tweets_texts['text']),300)] else: texts = ''.join(user_tweets_texts['text']) ops = [{'url':'https://tone-analyzer-demo.ng.bluemix.net/api/tone','data':texts}] # print(ops) analyzer = asynchronous_request_facebook_api(ops) # print(analyzer[0]) final_result = list(concat(list(flatten(list(map(lambda x:x['document_tone']['tones'],analyzer)))))) group_result = group_by(lambda x:x['tone_name'],final_result) formatDocs.append({}) print(len(formatDocs)) else: print(id) df2 = pd.DataFrame(formatDocs) df2 = df2.applymap(lambda x: x.encode('unicode_escape'). decode('utf-8') if isinstance(x, str) else x) # print(docs) df2.to_excel('./export_data/%s/user_summary/%s.xlsx' % ("twitter", "twitter_user_summary"), sheet_name='Sheet1')
def main(): # # CIFAR-10 # cifar = CIFAR_10() # # x_train.shape = (50000, 32, 32, 3) # y_train.shape = (50000, 10) # x_validation.shape= (10000, 32, 32, 3) # y_validation.shape= (10000, 10) # data = cifar.load_data() x_train = data['training_data'] y_train = data['training_label'] x_validation = data['validation_data'] y_validation = data['validation_label'] print("x_train.shape=", x_train.shape) print("y_train.shape=", y_train.shape) print("x_validation.shape=", x_validation.shape) print("y_validation.shape=", y_validation.shape) # # SqueezeNet # squeeze = SqueezeNet() i = Input(shape=x_train.shape[1:]) o = squeeze.make_graph(y_train.shape[1])(i) # # model # model = Model(inputs=i, outputs=o) # # compile model # model.compile( loss='categorical_crossentropy', optimizer=SGD(momentum=0.9), metrics=['accuracy'] ) # # generator in ImageDataGenerator by keras # train_data = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True, width_shift_range=0.125, height_shift_range=0.125, horizontal_flip=True ) validation_data = ImageDataGenerator( featurewise_center=True, featurewise_std_normalization=True ) for data in (train_data, validation_data): data.fit(x_train) # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う… … 。 # # check pickle # # file_pickle = "./results/history.pickle" model_path = "./results" model_file = model_path + "/model.h5" model_weights = model_path + "/weights.h5" print(f"models: model={model_file}, weight={model_weights}" ) # print(f"models: arch =", options['file_arch']) # print(f"models: weight=", options['model_weights']) if not path.exists(model_path): os.mkdir(model_path) # # print model # from lib_utils import print_model_summary print_model_summary(model, "./results/network.txt", "model.png") # # check model, if not exist trained model, we have to make trained parameters for model. # if not path.exists(model_file): # # fit generator # batch_size = 1000 # 100 epochs = 1 # 200 results = model.fit_generator( # # generate train data (ImageDataGenerator by keras) # train_data.flow(x_train, y_train, batch_size=batch_size), # # steps/epoch # steps_per_epoch=x_train.shape[0] // batch_size, # # epoch # epochs=epochs, # # callbacks # callbacks = [ LearningRateScheduler( partial( getitem, tuple(take(epochs, concat(repeat(0.010, 1), repeat(0.100, 99), repeat(0.010, 50), repeat(0.001)))) ) ) ], # # generate validation data (ImageDataGenerator by keras) # validation_data=validation_data.flow(x_validation, y_validation, batch_size=batch_size), # # validation step # validation_steps=x_validation.shape[0] // batch_size, # # max_queue_size # max_queue_size=4 ) # # save keras model # from lib_utils import save_model_by_keras save_model_by_keras(model, model_file, model_weights) # del model else: # # load keras model # if path.exists(model_file): print("load model...") from lib_utils import load_model_by_keras model = load_model_by_keras(model_file, model_weights) print("load model...done") else: print("load model...: not found=", model_file, model_weights ) # # check version # from lib_utils import get_version get_version(model_file) # # evaluate # """ print("model evaluate...") score = lmodel.evaluate(x_validation, y_validation, verbose=1) print("model evaluate: loss=", score[0]) print("model evaluate: accuracy=", score[1]) """ # # prediction # print("model prediction...") # lmodel.predict(y_validation.shape[1]) # lmodel.predict(x_train.shape[1:]) print("x_validation.shape=", x_validation.shape) print("x_validation.shape[0]=", x_validation.shape[0]) print("x_validation.shape[1]=", x_validation.shape[1]) print("x_validation.shape[2]=", x_validation.shape[2]) print("x_validation.shape[3]=", x_validation.shape[3]) i0 = x_validation[0:1] i1 = x_validation.reshape(10000,32,32,3) i2 = i1[0] print("i0.shape=", i0.shape) print("i1.shape=", i1.shape) print("i2.shape=", i2.shape) # lmodel.predict(i0, verbose=1) predo = model.predict(x_validation, verbose=1)[0] print(predo) """ """ preds = model.predict(x_validation, verbose=1) # for pre in preds: # y = pre.argmax() # print("label: ", y_validation[y]) print('done')