def make_plots(tweets, box, place): """Make plots from the extracted tweet data. Args: tweets: Iterable of tweets already filtered and within the bounding box. box = A pair of longitude and latitude pairs, with the southwest corner of the bounding box coming first. place = String for the place name of the bounding `box`. """ print 'Extracting tweets ...', data = list(process.extract_data(tweets)) print 'DONE' print 'Extracting census blocks ...', census_path = config.get('census', 'path') census_blocks = config.get('census', 'blocks') blocks = process.extract_blocks(os.path.join(census_path, census_blocks)) print 'DONE' print 'Processing data ...', longitude, latitude, time, users = process.process_data(data) print 'DONE' print 'Computing census block interactions ...', interactions = process.compute_block_interactions(users, blocks) print 'DONE' print 'Saving census block interactions ...', with open('census-block-interactions.tsv', mode='w') as f: process.dump_interactions(interactions, f) print 'DONE' print 'Making figures ...', figures = [] figures.append(make_map(longitude, latitude, time, box, place)) figures.append(make_user_map(longitude, latitude, users, box, place)) figures.append(make_user_checkins(users)) figures.append(make_heatmap(longitude, latitude, box, place)) figures.append(make_time(time)) print 'DONE' print 'Saving figures ...', for figure in figures: figure.savefig('{0}.png'.format(figure.get_label()), bbox_inches='tight', pad_inches=0.1) print 'DONE' plt.show()
def __getitem__(self, item): data = process_data(self.tweet[item], self.selected_text[item], self.sentiment[item], self.tokenizer, self.max_len) return { 'ids': torch.tensor(data["ids"], dtype=torch.long), 'mask': torch.tensor(data["mask"], dtype=torch.long), 'token_type_ids': torch.tensor(data["token_type_ids"], dtype=torch.long), 'targets_start': torch.tensor(data["targets_start"], dtype=torch.long), 'targets_end': torch.tensor(data["targets_end"], dtype=torch.long), 'orig_tweet': data["orig_tweet"], 'orig_selected': data["orig_selected"], 'sentiment': data["sentiment"], 'offsets': torch.tensor(data["offsets"], dtype=torch.long) }
def main(): env = sys.argv[1] db_details = DB_DETAILS[env] source_db = db_details['SOURCE_DB'] target_db = db_details['TARGET_DB'] logger.add("data-copier.info", rotation="1 MB", retention="10 days", level="INFO" ) logger.add("data-copier.err", rotation="1 MB", retention="10 days", level="ERROR" ) # Establishing connection to MySQL DB mysql_conn = get_connection(source_db) # Reading data from base tables through DataFrames logger.info('Reading data') df_dim_products, df_dim_customers, df_fact_product_revenue_dly, \ df_fact_revenue_dly = process_data(mysql_conn) # Loading data into facts and dim tables in Postgres logger.info('Loading data') load_data(df_dim_products, df_dim_customers, df_fact_product_revenue_dly, df_fact_revenue_dly, target_db)
pgpassword = '' pgdatabase = 'popnet' # DIFFERENT PATHS ------------------------------------------------------------------------------------------------------ # Get path to main script python_script_dir = os.path.dirname(os.path.abspath(__file__)) # Paths for the data / folders in the Project_data folder -------------------------------------------------------------- #path to ancillary data folder ancillary_data_folder_path = os.path.join(python_script_dir, "data", "ancillary") #path to GADM folder gadm_folder_path = os.path.join(python_script_dir, "data", "GADM") #path to GHS folder ghs_folder_path = os.path.join(python_script_dir, "data", "GHS") # Paths to storage during the data preparation (AUTOMATICALLY CREATED) ------------------------------------------------- #path to temp folder - will contain temporary files temp_folder_path = os.path.join(python_script_dir, "temp") #Files to be merged folder merge_folder_path = os.path.join(python_script_dir, "Tif_to_merge") #path to data folder to store the final tif files finished_data_path = os.path.join(python_script_dir, "Finished_data") # Process all data ----------------------------------------------------------------------------------------------------- process_data(country, pgpath, pghost, pgport, pguser, pgpassword, pgdatabase, ancillary_data_folder_path, gadm_folder_path, ghs_folder_path, temp_folder_path, merge_folder_path, finished_data_path, init_prep, init_import_to_postgres, init_run_queries, init_export_data, init_rasterize_data, init_merge_data)
from input import import_data from process import process_data # Read the data data = import_data('data.txt') data = process_data(data)
from process import process_data import config import joblib import torch from sklearn import model_selection from model import EntityModel from dataset import EntityDataset from transformers import AdamW, get_linear_schedule_with_warmup from engine import train_fn, eval_fn import numpy as np if __name__ == '__main__': sentences, pos, tag, enc_pos, enc_tag = process_data(config.TRAINING_FILE) meta_data = {"enc_pos": enc_pos, "enc_tag": enc_tag} joblib.dump(meta_data, "meta.bin") num_pos = len(list(enc_pos.classes_)) num_tag = len(list(enc_tag.classes_)) (train_sentences, test_sentences, train_pos, test_pos, train_tag, test_tag) = model_selection.train_test_split(sentences, pos, tag, random_state=42, test_size=0.1) train_dataset = EntityDataset(texts=train_sentences, pos=train_pos,
data = np.array(data) train_size = int(data.shape[0] * percent) train = data[0: train_size, :] test = data[train_size: , :] x_train = train[:, 0: -1] y_train = train[:, -1] x_test = test[:, 0: -1] y_test = test[:, -1] return x_train, y_train, x_test, y_test #Begin to train if __name__ == "__main__": process.process_data() x_train, y_train, x_test, y_test = get_data() adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 1) adaBoost.fit(x_train, y_train) ytest_ = adaBoost.predict(x_test) print(classification_report(y_test, ytest_)) adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 2) adaBoost.fit(x_train, y_train) ytest_ = adaBoost.predict(x_test) print(classification_report(y_test, ytest_)) adaBoost = ensemble.AdaBoostClassifier(DecisionTreeClassifier, 5) adaBoost.fit(x_train, y_train)
parser.add_argument("fine_tune") parser.add_argument("--hidden_size", default=512, type=int) parser.add_argument("--device", default="cuda") parser.add_argument("--batch_size", default=32, type=int) args = parser.parse_args() lang_model_names = ["scibert-base-cased", "xlnet-base-cased"] fine_tunes = [False] lang_model, tokenizer, lm_emb_size = parse_lang_model(args.lang_model_name) vocab_size = len(tokenizer) device = torch.device(args.device) train_loader = process_data(args.train_data, tokenizer, device, args.lang_model_name, batch_size=args.batch_size) dev_loader = process_data(args.dev_data, tokenizer, device, args.lang_model_name, is_dev=True, batch_size=args.batch_size) model = LangModelWithDense(lang_model, lm_emb_size, args.hidden_size, num_classes, args.fine_tune).to(device) print(model) print("Using model: {}".format(args.lang_model_name)) print("Using device: {}".format(device)) print("Using fine-tuning: {}".format(args.fine_tune)) print() optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
def main(): env = sys.argv[1] db_details = DB_DETAILS[env] source_db = db_details['SOURCE_DB'] target_db = db_details['TARGET_DB'] # Read data from retail_db mysql_conn = get_connection(source_db) # Process data using pandas dim_products_df, dim_customers_df, fact_product_revenue_dly_df, df_fact_revenue_dly= process_data(mysql_conn) # Write the data to retail_dw load_data(dim_products_df, dim_customers_df, fact_product_revenue_dly_df, df_fact_revenue_dly, target_db)
import numpy as np from process import process_data from fontANN import feedforward import sys X = np.array([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3])]) X = process_data(X) npzfile = np.load('matrix.npz') W2 = npzfile['W2'] B2 = npzfile['B2'] W1 = npzfile['W1'] B1 = npzfile['B1'] Y, Z = feedforward(X, W1, W2, B1, B2) print(int(np.rint(Y[0]))) # return 1
args = parser.parse_args() lang_model, tokenizer, lm_emb_size = parse_lang_model(args.lang_model) vocab_size = len(tokenizer) device = torch.device(args.device) print("Using model: {}".format(args.lang_model)) print("Using device: {}".format(device)) print("Using fine-tuning: {}".format(args.fine_tune)) print() train_loader = process_data(args.train_data, tokenizer, device, train_data=True, fine_tune=args.fine_tune, batch_size=args.batch_size) dev_loader = process_data(args.dev_data, tokenizer, device, fine_tune=args.fine_tune, batch_size=args.batch_size) model = LangModelWithDense(lang_model, vocab_size, lm_emb_size, args.hidden_size, args.fine_tune).to(device) print(model) epochs = 10 total_steps = len(train_loader) * epochs