def get_tweets(self): api = Api.twitter_api() tweets_ = [] created = [] hashtag = [] for search_word in self.search_words: new_search = search_word + " -filter:retweets" # Filter retweets tweets = tw.Cursor(api.search, q=new_search, lang='en').items(self.n_tweets) for tweet in tweets: tweets_.append(tweet.text) # Get tweets created.append(tweet.created_at) # Get timestamp hashtag.append(search_word) dataset = pd.DataFrame({ "hashtag": hashtag, "created_at": created, "tweet": tweets_ }) store_data = DataHandler('twitter', self.search_words) store_data.store_network_dataset(dataset)
def vader_sentiment(self): handler = DataHandler(self.social_network, self.search_word) df_network = handler.read_network_dataset() df = df_network[df_network.tweet != ''] prepross = Processing(self.social_network, self.search_word) analyzer = SentimentIntensityAnalyzer() predict_df = pd.DataFrame( None, columns=['date', 'hashtag', 'tweet', 'clean_tweet', 'sentiment']) for i, row in df.iterrows(): clean_tweet = prepross.clean_text(row['tweet']) sentiment = analyzer.polarity_scores(clean_tweet)['compound'] predict_df.loc[i] = [ row['created_at'], row['hashtag'], row['tweet'], clean_tweet, sentiment ] predict_df.to_csv(r'data/output/dataset_predict.csv', sep=';', index=None) #
def load_order_csv(self, path, columns=None): # Load order csv data data = self.load_file(path, columns) # Parse columns dh = DataHandler() data = dh.format_csv_actions(data) dh.format_csv_dates(data) return data
def train_sift(self,X_list): bow_list = [] for X in X_list: bow_list.append(self.compute(X)) self.bow_matrix = reduce(np.vstack,bow_list) dh = DataHandler() dh.load() sample_y = np.empty((len(X_list),1)) for i in range(len(sample_y)): sample_y[i][0] = dh.get_lables(id=i) sample_data = np.hstack(sample_y,self.bow_matrix) # save sample data np.savetxt(os.path.join(self.bow_path,'bow_sift.txt'),sample_data)
def bug_count(self): data_handler = DataHandler(data_path=root.joinpath('data')) files = data_handler.get_data() all_results = dict() for proj, data in files.items(): col_name = ['Date', 'Actual', 'ARIMA', 'NAIVE'] results = [] actual = 0 for train, test in self.moving_window(data, frame=24): try: p, d, q = 4, 1, 4 # if not self.is_stationary(train): # train = self.detrend_series(train) arima = ARIMA(train, order=(p, d, q), freq='W-MON') arima_fit = arima.fit(disp=0) # Find start and end time stamps start, end = test.index[0], test.index[-1] # Save date, actual, and forecast prev_actual = actual actual = test.values.ravel()[0] forecast_arima = int(abs(arima_fit.forecast()[0])) forecast_naive = prev_actual date = test.index.strftime("%Y-%m-%d").values[0] results.append( [date, actual, forecast_arima, forecast_naive]) except: X = np.arange(len(train.values) + 1) X = np.reshape(X, (len(X), 1)) y = train.values model = LinearRegression() model.fit(X[:-1], y) prev_actual = actual actual = test.values.ravel()[0] forecast_arima = int( abs(model.predict(X[-1].reshape(1, -1))[0])) forecast_naive = prev_actual date = test.index.strftime("%Y-%m-%d").values[0] results.append( [date, actual, forecast_arima, forecast_naive]) results = pd.DataFrame(results, columns=col_name).set_index('Date') results.to_csv(root.joinpath('results', proj + ".csv")) return all_results
def get_character_mentions(name): """ Perform a lookup for all the sentences in which a character has been mentioned, if we don't have a match on the name we return all the sentences for the main characters Parameters: name str: The name of a character, could be None Returns: A list of all the sentences for the given character or the list for every main character """ logger.info( f'CALLED FROM: {GET_CHARACTER_MENTIONS} ENDPOINT: Displaying the sentences for a character mentions or the sentences for all main characters.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) elif not name: abort(404, description='No name provided.') elif name in data: res = list(data[name][SENT].keys()) else: main_characters_names = list( filter(lambda x: data[x].get(RANK) == 1, data)) res = {} for name in main_characters_names: res[name] = list(data[name][SENT].keys()) return jsonify(res)
def get_characters_co_mentions(name_a, name_b): """ Display all the sentences in which we have both entities else an error Parameters: name_a str: The name of the first character from the book name_b str: The name of the second character from the book Returns: The list of sentences or 404 """ logger.info( f'CALLED FROM: {GET_CHARACTER_CO_MENTIONS} ENDPOINT: Displaying the sentences in which we have both character_a and character_b.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) logger.info( 'Called FROM: {GET_CHARACTER_CO_MENTIONS} ENDPOINT: No data in the collection.' ) if not all((name_a, name_b)): abort(404, description='Missing name argument.') sents = set() if name_a in data and name_b in data: for sent in data[name_a][SENT]: if name_b in sent: sents.add(sent) for sent in data[name_b][SENT]: if name_a in sent: sents.add(sent) return jsonify(list(sents))
def get_character_info(name): """ Performs a lookup in the data collection for a specific name and if we have a match returns is schema, else it return 404 with the error Parametes: name str: The name of the named entity Returns: the generate schema """ logger.info( f'CALLED FROM: {GET_CHARACTER_INFO} ENDPOINT: Performing a lookup on an entity.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) elif not name: abort(404, description='No name provided.') elif name not in data: abort(404, description=f"The name: {name} is not in the collection") res_entity = extract_entity(data[name]) return jsonify(res_entity)
def pre_processing(self): handler = DataHandler(self.social_network, self.search_word) df_network = handler.read_network_dataset() df = df_network[df_network.tweets != ''] nlp = spacy.load('pt_core_news_sm') #nlp = spacy.load('en_core_web_sm') nltk.download("stopwords") nltk.download('punkt') stop_words_ = STOP_WORDS.union(stopwords.words('english')) stop_words = [unidecode(stop).lower() for stop in stop_words_] nltk.download('rslp') all_words, all_words_n_gram = Processing.words_dataset( df['tweets'], stop_words, nlp) # Get all dataset words bag_words = [] bag_words_n_gram = [] n_gram = [] clean_tweets = [] for sentence in df['tweets']: clean = Processing.clean_text(sentence, stop_words) token = Processing.lemma(clean.split(), nlp) concat = ' '.join(token) ngram = Processing.n_gram(concat) n_gram.append(Processing.n_gram(concat)) bag_words_n_gram.append( Processing.bag_of_words(ngram, all_words_n_gram)) bag_words.append(Processing.bag_of_words(concat.split(), all_words)) clean_tweets.append(concat) Processing.word_cloud(clean_tweets) dataset = pd.DataFrame({ "Posts": clean_tweets, "BOW": bag_words, "N-gram": n_gram, "BOW-N": bag_words_n_gram }) handler.store_processed_dataset(dataset)
def run_evaluation(cfg): """Test a tensorflow model""" logger = logging.getLogger(run_evaluation.__name__) data, ground_truth = DataHandler(cfg.data, cfg.eval_n_elements, shuffle=False).next_batch() prediction = np.zeros(ground_truth.shape) samples = prediction.shape[0] tictoc = np.zeros(samples) print_every = 2500 logger.info('Load Tensorflow model') with TensorflowWrapper(os.path.dirname(cfg.model), os.path.basename(cfg.model), cfg.use_snapshots) as model: logger.info('Start model evaluation') for i in range(samples): start_time = time.time() prediction[i, :] = model.inference(data[i, :]) tictoc[i % print_every] = time.time() - start_time next_i = i + 1 if next_i > 0 and next_i % print_every == 0 or next_i == samples: logger.info( 'Processed: {:6.2f}% ({:5d}/{}) ({:.{width}f} ms/sample)'. format(next_i / samples * 100.0, next_i, samples, np.mean(tictoc) * 1e3, width=math.ceil(math.log10(samples)))) # Compute the loss error = np.mean(np.abs(ground_truth - prediction), axis=0) comined_error = np.mean(error) logger.info('Evaluation error: {:.5f} ({:.5f}/{:.5f})'.format( comined_error, error[0], error[1])) # Write the results into a .csv file if cfg.write_result: add_header_line = not os.path.isfile(cfg.results) with open(cfg.results, 'a') as capture: if add_header_line: capture.write( 'date_time,model,error_combined,error_linear,error_angular,execution_time\n' ) capture.write('{},{},{},{},{},{}\n'.format( time.strftime('%Y-%m-%d %H:%M:%S'), cfg.model, comined_error, error[0], error[1], np.mean(tictoc)))
class TestDataHandler(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestDataHandler, self).__init__(*args, **kwargs) self.dh = DataHandler(data_path=root.joinpath("data")) def test_get_data(self): all_data = self.dh.get_data() self.assertIsInstance(all_data, dict) for proj, datasets in all_data.items(): self.assertIsInstance(proj, str) self.assertIsInstance(datasets, dict) for key, value in datasets.items(): self.assertIsInstance(key, str) self.assertIsInstance(value, pd.core.frame.DataFrame)
def get_main_characters(): """ Get all the entities from the data collection that have the rank = 1 Returns: The generated schema for all the main entities """ logger.info( f'CALLED FROM: {GET_MAIN_CHARACTERS} ENDPOINT: Displaying all the main characters.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) main_characters_names = list( filter(lambda x: data[x].get('rank') == 1, data)) main_characters = get_entities(main_characters_names, data) return jsonify(create_response_schema(main_characters))
def get_support_characters(): """ Get all the entities from the data collection that have the rank = 2 Returns: The generated schema for all the support entities """ logger.info( f'CALLED FROM: {GET_SUPPORT_CHARACTES} ENDPOINT: Displaying all the support characters.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) secondary = list(filter(lambda x: data[x].get(RANK) == 2, data)) secondary_characters = get_entities(secondary, data) return jsonify(create_response_schema(secondary_characters))
def get_episode_characters(): """ Get 10 random entities from the data collection that have the rank = 3 Returns: The generated schema for all the episode entities """ logger.info( f'CALLED FROM: {GET_EPISODE_CHARACTERS} ENDPOINT: Displaying all the episode characters.' ) data = DataHandler().get_data() if not data: abort(404, description=MISSING_DATA_STR) episode_names = list(filter(lambda x: data[x].get(RANK) == 3, data)) random.shuffle(episode_names) episode_characters = get_entities(episode_names[:10], data) return jsonify(create_response_schema(episode_characters))
def run(self): logger = logging.getLogger(__name__) # Folder where to store snapshots, meta data and the final model storage_path = os.path.join( self.args.train_dir, (time.strftime('%Y-%m-%d_%H-%M_') + model.NAME)) logger.info('Build Tensorflow Graph') with tf.Graph().as_default(): # Define the used machine learning model global_step, learning_rate = model.learning_rate( self.args.learning_rate) self.sess = tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=8)) logger.info('Create the data runner for the input data') self.custom_data_runner = CustomDataRunner( self.args.datafile_train, self.args.batch_size, 2**18) data_batch, cmd_batch = self.custom_data_runner.get_inputs() logger.info('Add operations to the computation graph') keep_prob_placeholder = tf.placeholder( tf.float32, name='keep_prob_placeholder') prediction = model.inference(data_batch, keep_prob_placeholder, self.args.batch_size, output_name='prediction') loss, loss_split = model.loss(prediction, cmd_batch) train_op = model.training(loss, loss_split, learning_rate, global_step) eval_data_placeholder, eval_cmd_placeholder = self.placeholder_inputs( 1083, 2, 'eval_data_input') eval_prediction = model.inference(eval_data_placeholder, keep_prob_placeholder, self.eval_batch_size, training=False, reuse=True, output_name='eval_prediction') eval_predictions_placeholder = tf.placeholder( tf.float32, shape=[self.eval_n_elements, 2]) evaluation, evaluation_split = model.evaluation( eval_predictions_placeholder, eval_cmd_placeholder) # This model is saved with the trained weights and can direclty be executed exe_data_placeholder, exe_cmd_placeholder = self.placeholder_inputs( 1083, 2) model_inference = model.inference(exe_data_placeholder, keep_prob_placeholder, 1, training=False, reuse=True, output_name='model_inference') # Variables to use in the summary (shown in tensorboard) train_loss = tf.scalar_summary('loss', loss) train_loss_lin = tf.scalar_summary('loss_linear_x', loss_split[0]) train_loss_ang = tf.scalar_summary('loss_angular_yaw', loss_split[1]) train_learning_rate = tf.scalar_summary('learning_rate', learning_rate) eval_loss = tf.scalar_summary('loss', evaluation) eval_loss_lin = tf.scalar_summary('loss_linear_x', evaluation_split[0]) eval_loss_ang = tf.scalar_summary('loss_angular_yaw', evaluation_split[1]) summary_op = tf.merge_summary([ train_loss, train_loss_lin, train_loss_ang, train_learning_rate ]) eval_summary_op = tf.merge_summary( [eval_loss, eval_loss_lin, eval_loss_ang]) # Saver for model snapshots saver = tf.train.Saver() self.sess.run(tf.initialize_all_variables()) # start the tensorflow QueueRunner's self.coord = tf.train.Coordinator() self.runners = tf.train.start_queue_runners(sess=self.sess, coord=self.coord) # start our custom queue runner's threads self.custom_data_runner.start_threads(self.sess, self.coord) # Save summaries for training and evaluation in separate folders summary_writer = tf.train.SummaryWriter( os.path.join(storage_path, 'train'), self.sess.graph) eval_summary_writer = tf.train.SummaryWriter( os.path.join(storage_path, 'eval'), self.sess.graph) # Save the tensorflow graph definition as protobuf file (does not include weights) tf.train.write_graph(self.sess.graph_def, os.path.join(storage_path), 'graph.pb', False) #proto # Vector to average the duration over the last report steps duration_vector = [0.0] * (self.args.eval_steps // 100) if self.args.weight_initialize: logger.info('Initialize with weights from another model') if os.path.exists(self.args.weight_initialize): saver.restore(self.sess, self.args.weight_initialize) logger.info('Model restored: {}'.format( self.args.weight_initialize)) else: logger.warning('No weights are loaded!') logger.warning('File does not exist: {}'.format( self.args.weight_initialize)) logger.info('Load the evaluation data') (X_eval, Y_eval) = DataHandler(self.args.datafile_eval, self.eval_n_elements, shuffle=False).next_batch() X_eval = self.check_extend( X_eval, np.ceil(self.eval_n_elements / self.eval_batch_size) * self.eval_batch_size) loss_train = 0.0 # Perform all training steps logger.info('Training begins') for step in range(self.args.max_steps): start_time = time.time() feed_dict = {keep_prob_placeholder: 0.5} _, loss_value, loss_split_value, summary_str = self.sess.run( [train_op, loss, loss_split, summary_op], feed_dict=feed_dict) duration = time.time() - start_time # Report every 100 steps if step > 0 and step % 100 == 0: # Print status to stdout. logger.info( 'Step {}: loss = ({:.4f},{:.4f}) {:.3f} msec'.format( step, loss_split_value[0], loss_split_value[1], duration / 1e-3)) summary_writer.add_summary(summary_str, step) summary_writer.flush() loss_train = loss_value # Replace the durations in fifo fashion duration_vector[((step % self.args.eval_steps) // 100)] = duration # Evaluatie the model if step > 0 and step % self.args.eval_steps == 0 or step == ( self.args.max_steps - 1): start_eval = time.time() # Create an empty array, that has the correct size for to hold all predictions eval_predictions = np.zeros([X_eval.shape[0], 2], dtype=np.float) # Evaluate the data in batches and capture the predictions for index in range(X_eval.shape[0] // self.eval_batch_size): start_index = index * self.eval_batch_size end_index = (index + 1) * self.eval_batch_size feed_dict = { eval_data_placeholder: X_eval[start_index:end_index, :], keep_prob_placeholder: 1.0 } eval_predictions[ start_index:end_index, :] = self.sess.run( [eval_prediction], feed_dict=feed_dict)[0] # Finally evaluate the predictions and compute the scores feed_dict = { eval_predictions_placeholder: eval_predictions[:self.eval_n_elements, :], eval_cmd_placeholder: Y_eval, keep_prob_placeholder: 1.0 } loss_value, loss_split_value, summary_str = self.sess.run( [evaluation, evaluation_split, eval_summary_op], feed_dict=feed_dict) duration_eval = time.time() - start_eval logger.info( 'Evaluattion: loss = ({:.4f},{:.4f}) {:.3f} msec'. format(loss_split_value[0], loss_split_value[1], duration_eval / 1e-3)) eval_summary_writer.add_summary(summary_str, step) eval_summary_writer.flush() if step > 0 and step % 1000 == 0: # Save a checkpoint logger.info('Save model snapshot') filename = os.path.join(storage_path, 'snap') saver.save(self.sess, filename, global_step=step) logger.info('Save final model snapshot') filename = os.path.join(storage_path, 'final') saver.save(self.sess, filename) # Save the model with weights in one file # This will only capture the operations used to generate the prediction. It also # replaces the variables with the weights from training as constant values # See: # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py logger.info('Save final model with weights') output_node_names = 'model_inference' output_graph_def = tf.python.client.graph_util.convert_variables_to_constants( self.sess, self.sess.graph_def, output_node_names.split(",")) with tf.gfile.GFile(os.path.join(storage_path, 'model.pb'), "wb") as f: f.write(output_graph_def.SerializeToString()) logger.info("{} ops in the final graph.".format( len(output_graph_def.node))) if self.args.mail: self.send_notification(loss_train, loss_value)
def test_something(self): dh = DataHandler() dh.parse_data("design.json")
from data.data_loader import DataLoader from data.data_handler import DataHandler from data.order_parser import OrderParser DataLoader = DataLoader() DataHandler = DataHandler() OrderParser = OrderParser
parser.add_argument('-i',action='store',dest='id_upper',type = int) parser.add_argument('-c',action='store_true',help='train classifier') parser.add_argument('-f',action='store',dest='file',type=str,help='parser a clothes image') args = parser.parse_args() if args.cmd == 'train': if args.b: if args.s: train_bow_sift(args.id_upper) elif args.p: train_bow_pixel() if args.c: train_clf('pixel') if args.cmd == 'test': kmeans = KmeansModel() kmeans.load('kmeans_pixel') clf = RandomForest() clf.load() data = DataHandler() data.load() if args.file: for res in clf.predict(kmeans,file): print int(res),data.tell_label(int(res)) if args.cmd == 'data': data = DataHandler() data.parse_data('design.json') data.save()
# Climb up the directory tree until you reach root = root.parent if root not in sys.path: sys.path.append(root) from metrics.abcd import ABCD from data.data_handler import DataHandler from prediction.model import PredictionModel import warnings warnings.filterwarnings("ignore") if __name__ == "__main__": dh = DataHandler() data = dh.get_data(top_k=1) for _, val in data.items(): data = val X = data[data.columns[:-1]] y = data[data.columns[-1]] # lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y) # model = SelectFromModel(lsvc, prefit=True) # X = model.transform(X) pca = PCA(n_components=3) pca.fit(X) X = pca.transform(X) colors = ['navy', 'darkorange'] for X_transformed, title in [(X, "PCA")]:
from config import cifar_configs from train.trainer import Trainer from data.data_handler import DataHandler if __name__ == "__main__": # parser = argparse.ArgumentParser() # parser.add_argument("--config-file", default = "./config/train_config.yaml", metavar = "FILE", type = str) # args = parser.parse_args() # #extract config # config_file = open(args.config_file, 'r') # configs = yaml.load(config_file) datahandler = DataHandler(cifar_configs) trainer = Trainer(cifar_configs, datahandler) trainer.train()
from pdb import set_trace from prettytable import PrettyTable from pathlib import Path root = Path(os.path.abspath(os.path.join(os.getcwd().split("src")[0], 'src'))) if root not in sys.path: sys.path.append(str(root)) from metrics.abcd import ABCD from data.data_handler import DataHandler from prediction.model import PredictionModel if __name__ == "__main__": dh = DataHandler() mdl = PredictionModel() data = dh.get_data() "Create a Table than can pretty printed" results = PrettyTable() results.field_names = ["Train", "Test ", " Pd", " Pf", " F1"] "Align Data" results.align["Train"] = "l" results.align["Test "] = "l" results.align[" Pd"] = "r" results.align[" Pf"] = "r" results.align[" F1"] = "r" for proj, dataset in data.items():
def get_tweets(self): api = Api.twitter_api() tweets = api.user_timeline(screen_name=self.tt_user, count=200, tweet_mode='extended', include_rts=False, exclude_replies=True) last_id = tweets[-1].id while (True): more_tweets = api.user_timeline(screen_name=self.tt_user, count=200, include_rts=False, exclude_replies=True, max_id=last_id - 1) if (len(more_tweets) == 0): break else: last_id = more_tweets[-1].id - 1 tweets = tweets + more_tweets created = [] tweet_id = [] text = [] hashtags = [] symbols = [] image_url = [] user_mentions = [] user_id = [] user_name = [] user_screen_name = [] user_location = [] user_description = [] user_protected = [] user_followers_count = [] user_friends_count = [] user_listed_count = [] user_created_at = [] user_favourites_count = [] user_utc_offset = [] user_timezone = [] user_geo_enabled = [] user_verified = [] user_statuses_count = [] user_lang = [] user_contributors_enabled = [] user_is_translator = [] user_is_translation_enabled = [] quoted_status = [] quoted_text = [] quoted_media = [] quoted_user_id = [] for tweet in tweets: created.append(tweet.created_at) tweet_id.append(tweet.id) try: text.append(tweet.full_text) except AttributeError: text.append(tweet.text) hashtags.append(tweet.entities['hashtags']) symbols.append(tweet.entities['symbols']) user_mentions.append(tweet.entities['user_mentions']) user_id.append(tweet.user.id) user_name.append(tweet.user.name) user_screen_name.append(tweet.user.screen_name) user_location.append(tweet.user.location) user_description.append(tweet.user.description) user_protected.append(tweet.user.protected) user_followers_count.append(tweet.user.followers_count) user_friends_count.append(tweet.user.friends_count) user_listed_count.append(tweet.user.listed_count) user_created_at.append(tweet.user.created_at.strftime("%Y-%m-%d")) user_favourites_count.append(tweet.user.favourites_count) user_utc_offset.append(tweet.user.utc_offset) user_timezone.append(tweet.user.time_zone) user_geo_enabled.append(tweet.user.geo_enabled) user_verified.append(tweet.user.verified) user_statuses_count.append(tweet.user.statuses_count) user_lang.append(tweet.user.lang) user_contributors_enabled.append(tweet.user.contributors_enabled) user_is_translator.append(tweet.user.is_translator) user_is_translation_enabled.append( tweet.user.is_translation_enabled) if tweet.is_quote_status == True: try: quoted_text.append(tweet.quoted_status.text) except AttributeError: quoted_text.append(np.nan) try: quoted_user_id.append(tweet.quoted_status.user.id) except AttributeError: quoted_user_id.append(np.nan) try: quoted_media.append( tweet.quoted_status.entities['media'][0]['media_url']) except Exception: quoted_media.append(np.nan) else: quoted_text.append(np.nan) quoted_user_id.append(np.nan) quoted_media.append(np.nan) try: image_url.append(tweet.entities['media'][0]['media_url']) except: image_url.append(np.nan) dataset = pd.DataFrame({ "created_at": created, "tweet_id": tweet_id, "text": text, "hashtags": hashtags, "symbols": symbols, "image_url": image_url, "user_mentions": user_mentions, "user_id": user_id, "user_name": user_name, "user_screen_name": user_screen_name, "user_location": user_location, "user_description": user_description, "user_protected": user_protected, "user_followers_count": user_followers_count, "user_friends_count": user_friends_count, "user_listed_count": user_listed_count, "user_created_at": user_created_at, "user_favourites_count": user_favourites_count, "user_utc_offset": user_utc_offset, "user_timezone": user_timezone, "user_geo_enabled": user_geo_enabled, "user_verified": user_verified, "user_statuses_count": user_statuses_count, "user_lang": user_lang, "user_contributors_enabled": user_contributors_enabled, "user_is_translator": user_is_translator, "user_is_translation_enabled": user_is_translation_enabled, "quoted_text": quoted_text, "quoted_media": quoted_media, "quoted_user_id": quoted_user_id, }) store_data = DataHandler('twitter', self.tt_user) store_data.store_network_dataset(dataset) import pdb pdb.set_trace() print('a')
def __init__(self, *args, **kwargs): super(TestDataHandler, self).__init__(*args, **kwargs) self.dh = DataHandler(data_path=root.joinpath("data"))
# transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)), # ]) transform_test = transforms.Compose([ transforms.ToPILImage(), # transforms.ToTensor(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # transforms=[transform_train,transform_test] dataset_configs = (train_dataset_config, test_dataset_config) datahandler = DataHandler(ds_class=(MenWomenDataset, MenWomenDataset), transforms=None, dataset_configs=dataset_configs, configs=cfgs) # datahandler.show_batch(1,6,mode='train') # trainer buiding trainer_configs = { 'model_path': '', 'validate': 0.7, 'lr': 0.001, 'num_epochs': 10, 'steps_save_loss': 2, 'output_folder': 'C:\\Users\\thanhdh6\\Documents\\projects\\vinbrain_internship\\image_classifier\\train\\logs', 'device': 'cpu', 'gpu_id': 0, 'lr_schedule': None,