def login(crversion, url, login, password, sessionname): IsVersionSupported, CallType, ApiUri, Headers, Body = get_login_resources( crversion, login, password, sessionname) if not IsVersionSupported: logging.debug("Unsupported CR Version: {}".format(crversion)) print("Unsupported CR Version") exit(1) FULLURL = urllib.parse.urljoin(url, ApiUri) response = requests.request(CallType, FULLURL, data=Body, headers=Headers) isAPICallOK = StdResponses.processAPIResponse(response) if (not isAPICallOK): exit(99) else: #print("DEBUG"+str(response.text)) isError, Code = AuthResponses.Process_Auth_Login_Response(response) if (not isError): DataUtils.StoreAuthToken(Code, sessionname) DataUtils.StoreUrl(url, sessionname) DataUtils.StoreCRVersion(crversion, sessionname) print("Token Stored in session: " + sessionname)
def generic_api_call_handler(outputFormat,sessionname,get_res_func,res_data,df_transform_func): url = DataUtils.GetUrl(sessionname) TOKEN = DataUtils.GetAuthToken(sessionname) CRVERSION = DataUtils.GetCRVersion(sessionname) IsVersionSupported,CallType,ApiUri,Headers,Body = get_res_func(CRVERSION,sessionname,TOKEN,res_data) if not IsVersionSupported: logging.debug("Unsupported CR Version: {}".format(crversion)) print("Unsupported CR Version") exit(1) FULLURL = urllib.parse.urljoin(url,ApiUri) response = requests.request(method=CallType, url=FULLURL, data=Body, headers=Headers) isAPICallOK = StdResponses.processAPIResponse(response) if(not isAPICallOK): exit(99) else: json_object = json.loads(response.text) if (outputFormat == "DF"): #print(json_object) aDF = df_transform_func(json_object) print(aDF) elif (outputFormat == "CSV"): #print(json_object) aDF = df_transform_func(json_object) print(aDF.to_csv(index=False)) else: #print(json_object) json_formatted_str = json.dumps(json_object, indent=2) print(json_formatted_str)
def build_abstract_tuples(): abstracts_directory = Config.extracted_abstracts_dir revision_ids_directory = Config.extracted_revision_ids_dir abstracts_filenames = sorted(os.listdir(abstracts_directory)) revision_ids_filenames = sorted(os.listdir(revision_ids_directory)) for abstract_filename, revision_ids_filename in zip( abstracts_filenames, revision_ids_filenames): tuples = list() abstracts = DataUtils.load_json(abstracts_directory, abstract_filename) revision_ids = DataUtils.load_json(revision_ids_directory, revision_ids_filename) for page_name in abstracts: json_dict = dict() json_dict['template_name'] = None json_dict['template_type'] = None json_dict[ 'subject'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace( ' ', '_') json_dict['predicate'] = 'abstract' json_dict['object'] = abstracts[page_name] json_dict[ 'source'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace( ' ', '_') json_dict['version'] = revision_ids[page_name] tuples.append(json_dict) DataUtils.save_json(Config.final_abstract_tuples_dir, abstract_filename, tuples)
def callbackMain(self): app = App.get_running_app() data_dir = app.user_dir + "/datasets" dataUtils = DataUtils(data_dir) dataUtils.mergeFiles() # creates datasets for NN training self.ids["up"].clear_widgets() sm.current = "Main"
def extract_image_names_from_sql_dump(): image_names_types = dict() # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 max_int = sys.maxsize while True: # decrease the maxInt value by factor 10 # as long as the OverflowError occurs. try: csv.field_size_limit(max_int) break except OverflowError: max_int = int(max_int / 10) all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_images_dump, quotechar='"') for record in all_records: for columns in record: image_name, image_type = columns[0].strip("'"), columns[8].strip( "'") image_names_types[image_name] = image_type DataUtils.save_json(Config.extracted_image_names_types_dir, Config.extracted_image_names_types_filename, image_names_types)
def get_learning_instance_detail(learningInstanceName="", learningInstanceID="", sessionname="", CsvOutput=False, ProcessOutput=True): if (learningInstanceName != ""): learningInstanceID = IQBotCommons.ConvertLINameToLIID( sessionname, learningInstanceName) URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname), get_LI_DETAIL_URI(learningInstanceID)) headers = { 'Content-Type': "application/json", 'cache-control': "no-cache", 'X-Authorization': DataUtils.GetAuthToken(sessionname) } response = requests.request(LI_DETAIL_REQ_TYPE, URL, headers=headers) if (ProcessOutput): isInError = IQBotLIResponses.Process_LI_Detail_Response( response, CsvOutput) else: return response
def save_sql_dump(directory, filename, sql_dump, encoding='utf8'): if len(directory) < 255: DataUtils.create_directory(directory) sql_filename = join(directory, filename) sql_file = open(sql_filename, 'w+', encoding=encoding) sql_file.write(sql_dump) sql_file.close()
def build_category_tuples(): category_directory = Config.extracted_category_links_dir category_filename = Config.extracted_category_links_filename categories = DataUtils.load_json(category_directory, category_filename) revision_ids_directory = Config.extracted_revision_ids_dir revision_ids_filenames = sorted(os.listdir(revision_ids_directory)) for revision_ids_filename in revision_ids_filenames: revision_ids = DataUtils.load_json(revision_ids_directory, revision_ids_filename) tuples = list() for page_name in revision_ids: page_name = page_name.replace(' ', '_') if page_name in categories: page_categories = categories[page_name] for page_category in page_categories: json_dict = dict() json_dict['template_name'] = None json_dict['template_type'] = None json_dict[ 'subject'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace( ' ', '_') json_dict['predicate'] = 'wikiCategory' json_dict['object'] = page_category.replace('_', ' ') json_dict[ 'source'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace( ' ', '_') json_dict['version'] = revision_ids[page_name.replace( '_', ' ')] tuples.append(json_dict) DataUtils.save_json(Config.final_category_tuples_dir, revision_ids_filename, tuples)
def reorganize_infoboxes(): reorganized_infoboxes = dict() directory = Config.extracted_with_infobox_dir filenames = DataUtils.get_infoboxes_filenames(directory) for filename in filenames: infoboxes = DataUtils.load_json(directory, filename) for infobox_name in infoboxes: template_name, infobox_type = DataUtils.get_infobox_name_type( infobox_name) if infobox_type not in reorganized_infoboxes: reorganized_infoboxes[infobox_type] = dict() if infobox_name not in reorganized_infoboxes[infobox_type]: reorganized_infoboxes[infobox_type][infobox_name] = dict() for page_name in infoboxes[infobox_name]: reorganized_infoboxes[infobox_type][infobox_name][ page_name] = infoboxes[infobox_name][page_name] for infobox_type in reorganized_infoboxes: for infobox_name in reorganized_infoboxes[infobox_type]: infobox_name_type_path = join(Config.reorganized_infoboxes_dir, infobox_type, infobox_name) DataUtils.save_json( infobox_name_type_path, 'infoboxes', reorganized_infoboxes[infobox_type][infobox_name])
def listTorrents(outputFormat, sessionname): OutputFormat = 0 url = DataUtils.GetUrl(sessionname) SID = DataUtils.GetAuthToken(sessionname) DSMVERSION = DataUtils.GetDSMVersion(sessionname) IsVersionSupported, URLParams, headers = get_torrent_list_url( DSMVERSION, SID) if not IsVersionSupported: logging.debug("Unsupported DSM Version: {}".format(DSMVERSION)) print("Unsupported DSM Version") exit(1) FULLURL = urllib.parse.urljoin(url, URLParams) response = requests.request("GET", FULLURL, data=None, headers=headers) isAPICallOK = StdResponses.processAPIResponse(response) if (not isAPICallOK): exit(99) else: json_object = json.loads(response.text) if (outputFormat == "DF"): #print(json_object) aDF = DSMTransformers.GetListAsCsv(json_object) print(aDF) elif (outputFormat == "CSV"): #print(json_object) aDF = DSMTransformers.GetListAsCsv(json_object) print(aDF.to_csv(index=False)) else: #print(json_object) json_formatted_str = json.dumps(json_object, indent=2) print(json_formatted_str)
def pre_deal_data(alpha=1, beta=1): """ :param alpha: 运行时间权重 :param beta: 代码行数权重 """ for case_id in raw_case_map.keys(): timeList = [] lineList = [] for raw_case in raw_case_map[case_id]: # print(raw_case) timeList.append(raw_case.time) lineList.append(raw_case.line) # print(timeList) timeAVG = np.average(timeList) timeVAR = np.var(timeList) lineAVG = np.average(lineList) lineVAR = np.var(lineList) for raw_case in raw_case_map[case_id]: temp = raw_case.copy() time = DataUtils.omega(raw_case.time, timeAVG, timeVAR) line = DataUtils.omega(raw_case.line, lineAVG, lineVAR) temp.score = temp.score * time**alpha * line**beta student_case_map[temp.user_id][temp.case_id] = temp case_student_map[temp.case_id][temp.user_id] = temp
def Process_Auth_Login_Response(res, url, sessionname): if (res.status_code >= 400): print("Error Code: " + str(res.status_code)) try: result = json.loads(res.text) if result['message']: print("Error Message: " + result['message']) return True except: return True return True else: try: result = json.loads(res.text) #print(result['token']) token = result['token'] #print("Token:["+token+"]") print("Token Stored in session: " + sessionname) DataUtils.StoreAuthToken(token, sessionname) DataUtils.StoreUrl(url, sessionname) return False except: print("Unknown Error.") return True
def run(data, function, error, alpha, epsilion=1e-9, reg_type="L2GD", lamdas=[], degree=0): global f f = function train, validate = DataUtils.data_split(data, split_at=0.8)\ x_train, y_train = DataUtils.xy_split(train) x_val, y_val = DataUtils.xy_split(validate) print(f"Starting Regularized Gradient Descent\n with alpha={alpha}, epsilion={epsilion}\n") x_train.insert(0, "Const", np.ones(x_train.shape[0])) x_val.insert(0, "Const", np.ones(x_val.shape[0])) x_train = np.array(x_train) y_train = np.array(y_train) if len(lamdas) == 0: # lamdas = np.arange(0,1,0.1) # values b/w 0 and 1 stepped by 0.1 # lamdas = [i for i in range(-1200, 1200, 200)] lamdas = np.linspace(-15000, 3000, 30) val_err = list() train_err = list() w_list = list() for lamda in tqdm(lamdas): # print(f"\n>>>Lamda: {lamda}") w = grad_desc(x_train, y_train, error, alpha, epsilion, lamda, reg_type) w_list.append(w) val_err.append( test(w, x_val, y_val) ) train_err.append( error(w, x_train, y_train) ) lamdas = np.array(lamdas)/data.shape[0] return w_list, lamdas, val_err, train_err
def change_group(groupnum,liname,operation,sessionname,CsvOutput,ProcessOutput = True): liid,groupMappings = IQBotCommons.GetAllGroupsFromLI(sessionname,liname) AllGroups = [] if("," in groupnum): AllGroups = groupnum.split(",") elif(groupnum.upper() in ["ALL","ALLGROUPS","ALLGRPS","EVERYTHING"]): AllGroups = list(groupMappings.keys()) else: AllGroups.append(groupnum) for Grp in AllGroups: GrpId = groupMappings[Grp] URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname), get_LI_GROUP_CHANGE_STATE_URI(liid,GrpId,Grp)) payload = GET_GROUP_CHANGE_STATE_BODY(operation) headers = { 'Content-Type': "application/json", 'cache-control': "no-cache", 'X-Authorization': DataUtils.GetAuthToken(sessionname) } response = requests.request(LI_GROUP_CHANGE_STATE_REQ_TYPE, URL,data=payload, headers=headers) #print(response.text) if(ProcessOutput): isInError = IQBotGroupResponses.Process_Grp_State_Change_Response(response,Grp,liname,CsvOutput) else: return response
def try_params(model_gen, params, data, output_dir, base_fname, model_name, OBJECT, regression=False, nb_epoch=2, validation_data=(None, None)): def metrics_names(metrics): return sorted(metrics.keys()) def metrics_to_list(metrics): return map(lambda key: metrics[key], metrics_names(metrics)) summary_csv_fname = os.path.join( output_dir, base_fname + '_' + model_name + '_summary.csv') X_train, Y_train, X_test, Y_test = data nb_classes = params[1] to_write = [] for param in params: param_base_fname = base_fname + '_' + model_name + '_' + '_'.join(map(str, param[2:])) model_fname = os.path.join( output_dir, param_base_fname + '.h5') csv_fname = os.path.join( output_dir, param_base_fname + '.csv') # Make, train, and evaluate the model model = model_gen(*param, regression=regression) if regression: train_time = run_model(model, data, nb_epoch=nb_epoch, validation_data=validation_data) metrics = evaluate_model_regression(model, X_test, Y_test) else: if nb_classes == 2: train_time, metrics = learn_and_eval(model, data, validation_data=validation_data) else: train_time = run_model(model, data, nb_epoch=nb_epoch, validation_data=validation_data) metrics = evaluate_model_multiclass(model, X_test, Y_test) # Output predictions and save the model # Redo some computation to save my sanity conf1 = model.predict(X_train, batch_size=256, verbose=0) conf2 = model.predict(X_test, batch_size=256, verbose=0) conf = np.concatenate([conf1, conf2]) if len(conf.shape) > 1: assert len(conf.shape) == 2 assert conf.shape[1] <= 2 if conf.shape[1] == 2: conf = conf[:, 1] else: conf = np.ravel(conf) DataUtils.confidences_to_csv(csv_fname, conf, OBJECT) model.save(model_fname) to_write.append(list(param[2:]) + [train_time] + metrics_to_list(metrics)) print param print train_time, metrics print print to_write # First two params don't need to be written out param_column_names = map(lambda i: 'param' + str(i), xrange(len(params[0]) - 2)) column_names = param_column_names + ['train_time'] + metrics_names(metrics) DataUtils.output_csv(summary_csv_fname, to_write, column_names)
def generic_api_call_handler_no_post(outputFormat,sessionname,get_res_func,res_data): url = DataUtils.GetUrl(sessionname) TOKEN = DataUtils.GetAuthToken(sessionname) CRVERSION = DataUtils.GetCRVersion(sessionname) IsVersionSupported,CallType,ApiUri,Headers,Body = get_res_func(CRVERSION,sessionname,TOKEN,res_data) if not IsVersionSupported: logging.debug("Unsupported CR Version: {}".format(crversion)) print("Unsupported CR Version") exit(1) FULLURL = urllib.parse.urljoin(url,ApiUri) response = requests.request(method=CallType, url=FULLURL, data=Body, headers=Headers) isAPICallOK = StdResponses.processAPIResponse(response) if(not isAPICallOK): exit(99) else: if(response.text != ""): json_object = json.loads(response.text) return json_object else: return response.status_code
def not_map_farsnet_kg_ontology(): input_ontology_filename = DataUtils.join(Config.farsnet_ontology, Config.farsnet_ontology_filename) input_farsnet_map_ontology_filename = DataUtils.join( Config.farsnet_ontology, Config.farsnet_map_ontology_filename) output_farsnet_not_map_ontology_filename = DataUtils.join( Config.farsnet_ontology, Config.farsnet_not_map_ontology_filename) normalizer = hazm.Normalizer() flag_find = False item = 'word' with open(input_ontology_filename, 'r') as input_file_ontology, \ open(output_farsnet_not_map_ontology_filename, 'a') as output_file: csv_reader_ontology, csv_writer = csv.reader( input_file_ontology), csv.writer(output_file) for line_ontology in csv_reader_ontology: if not flag_find: csv_writer.writerow([item]) print(item) item = normalizer.normalize(line_ontology[0]) flag_find = False with open(input_farsnet_map_ontology_filename, 'r') as input_file_map: csv_reader_graph = csv.reader(input_file_map) for line_map in csv_reader_graph: if item == normalizer.normalize(line_map[1]): flag_find = True break
def find_farsnet_disambiguate_page(): input_ambiguate_abstract_filename = join( Config.article_names_dir, Config.farsnet_ambiguate_abstract_filename) disambiguate_filename = os.listdir(Config.extracted_disambiguations_dir) abstract_filename = os.listdir(Config.extracted_texts_dir) output_disambiguate_wiki = join(Config.article_names_dir, Config.farsnet_disambiguate_wiki_filename) max_number = 0 min_number = 1000 with open(output_disambiguate_wiki, 'w') as output_file, open(input_ambiguate_abstract_filename, 'r') as input_file: csv_writer, csv_reader = csv.writer(output_file), csv.reader( input_file) for line in csv_reader: for disambiguate_file in disambiguate_filename: list_disambiguate = DataUtils.load_json( Config.extracted_disambiguations_dir, disambiguate_file) # for item in list_disambiguate: # if line[1] ==item: for item_disambiguate in list_disambiguate: if line[1] == item_disambiguate['title']: print(line[1] + ' find in disambiguate page.') for abstract_file in abstract_filename: list_abstract = DataUtils.load_json( Config.extracted_texts_dir, abstract_file) for abstract_key in list_abstract: if any(abstract_key == d for d in item_disambiguate['field']): print('find abstract_key: ' + abstract_key) sentence_snapshot = str(line[3]).replace( ',', ' ').replace('،', ' ') + ' ' gloss_sentence = str(line[4]).replace( ',', ' ').replace('،', ' ') + ' ' example = gloss = str(line[5]).replace( ',', ' ').replace('،', ' ') + ' ' sentence1 = sentence_snapshot + gloss_sentence + example sentence2 = str( list_abstract[abstract_key]).replace( ',', ' ').replace('،', ' ').replace( '.', ' ') diff = similar(sentence1, sentence2) if diff > max_number: max_number = diff if diff < min_number: min_number = diff csv_writer.writerow([ line[0], line[1], line[2], line[3], line[4], line[5], abstract_key, list_abstract[abstract_key], diff ])
def realization_elm(elm, d, att, xor=False): np.random.shuffle(d) qt_training = int(0.8 * len(d)) train_data, test_data = d[:qt_training], d[qt_training:] elm.train(train_data, att) accuracy = elm.test(test_data, att) if (xor): ut.plot_decision_surface_elm(elm, test_data, att) return accuracy
def revert_previous_etags(): try: previous_etags = DataUtils.load_json( Config.update_dir, Config.previous_wiki_rss_etags_filename) DataUtils.save_json(Config.update_dir, Config.wiki_rss_etags_filename, previous_etags) except FileNotFoundError: DataUtils.save_json(Config.update_dir, Config.wiki_rss_etags_filename, {dump_name: '' for dump_name in DUMP_NAMES})
def extract_bz2_dump(lang): input_filename = Config.latest_pages_articles_dump[lang] output_dir = Config.extracted_pages_articles_dir[lang] DataUtils.create_directory(output_dir, show_logging=True) if not os.listdir(output_dir): pages_counter = 0 extracted_pages_filename, extracted_pages_file = DataUtils.open_extracted_bz2_dump_file( pages_counter, output_dir, lang) for page in DataUtils.get_wikipedia_pages(input_filename): extracted_pages_file.write(page) pages_counter += 1 if pages_counter % Config.extracted_pages_per_file[lang] == 0: LogUtils.logging_pages_extraction(pages_counter, extracted_pages_filename) DataUtils.close_extracted_bz2_dump_file( extracted_pages_filename, extracted_pages_file) extracted_pages_filename, extracted_pages_file = \ DataUtils.open_extracted_bz2_dump_file(pages_counter, output_dir, lang) LogUtils.logging_pages_extraction(pages_counter, extracted_pages_filename) DataUtils.close_extracted_bz2_dump_file(extracted_pages_filename, extracted_pages_file) logging.info( 'Page Extraction Finished! Number of All Extracted Pages: %d' % pages_counter)
def run(model_name="squeezenet_by_pass"): # input image dimensions - from data utils img_rows, img_cols = d_utils.IMAGE_SIZE, d_utils.IMAGE_SIZE num_classes = d_utils.NUM_CLASSES channels = d_utils.NUM_CHANNELS # Read and prepare data x_train, y_train, training_le = d_utils.load_training_images() x_test, y_test = d_utils.load_test_images(training_le) x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels) x_train = x_train.astype('float32') x_test = x_test.astype('float32') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) input_shape = (img_rows, img_cols, channels) # Tensorboard callback tbCallBack = keras.callbacks.TensorBoard(log_dir='./tensorboard/' + model_name, histogram_freq=0, write_graph=True, write_images=False) # Build the model model = squeeze_net_by_pass(num_classes, input_shape) # Compile the model model.compile(loss=params['loss'], optimizer=keras.optimizers.SGD(lr=params['base_lr'], decay=params['decay_lr'], momentum=params['momentum']), metrics=['accuracy']) # Train the model model.fit(x_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], verbose=1, validation_data=(x_test, y_test), callbacks=[tbCallBack]) # Print Results score = model.evaluate(x_test, y_test, verbose=0) print('Results for ' + model_name + ':') print('Test loss:', score[0]) print('Test accuracy:', score[1])
def bankData(): bankData = [] for l_type in positions: l_data = DataUtils.get_frame_by_coord( "hello bank", pdf_link, DataUtils.compute_coord_from_object(positions, l_type), 1).values.tolist() for line in l_data: bankData.append( ["HELLO BANK", "HOME LOAN", l_type, line[0], line[1]]) return bankData
def table_extraction_bye_page_title(title): wiki_text, revision_id = get_wikitext_by_api(title) if wiki_text: tuples = list() wiki_text = wp.parse(wiki_text) for section in wiki_text.sections: for table in section.tables: new_tuples = build_tuples(table, title, section.title, revision_id)[0] tuples.extend(new_tuples) DataUtils.save_json(Config.wiki_table_tuples_dir, title, tuples)
def extract_page_ids_from_sql_dump(): all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_page_dump) page_ids = dict() for record in all_records: for columns in record: page_id, page_namespace, page_title = columns[0], columns[ 1], columns[2] page_ids[page_id] = page_title DataUtils.save_json(Config.extracted_page_ids_dir, Config.extracted_page_ids_filename, page_ids)
def copy_result(new_version_dir): successful_copy = True destination_address = join(DESTINATION_DIR, new_version_dir) DataUtils.create_directory(destination_address) for directory in RESULT_DIRECTORIES: if not DataUtils.copy_directory(directory, destination_address): successful_copy = False logging.info('Result directories successfully copied.') DataUtils.create_symlink(destination_address, join(DESTINATION_DIR, 'last')) return successful_copy
def main(): # Configs config = configs.config() # Reading the selected data DataCSVFrame = pd.read_csv("DataFrame.csv", usecols=["Image_Index", "Finding_Labels"], index_col=False) labelsSet = set(DataCSVFrame["Finding_Labels"].values) # Dictionary with the label as key and the index in the set as value labelsDict = {} # Dictionary that is the reverse of the one above, to change back from value to the corresponding label labelDictClassify = {} # Filling the dictionaries for index, label in enumerate(labelsSet): labelsDict[label] = index labelDictClassify[index] = label # Path where all the images are stored imgPath = config.getImagePath() # Creating the dataset xrayDataset = DC.XRayDataset(DataCSVFrame, imgPath, labelsDict) # # Getting the first image from the dataset # imgs, labs = xrayDataset.__getitem__([8307]) # print(len(imgs)) # print(imgs) # Get the device (cpu/gpu) to run the model on device = DU.getDevice() # Gets the ranges of training and test data training, testing = DU.splitTrainTest(xrayDataset, config) # Get the train and validation sets trainSets, valSets = DU.trainValSets(training, config) # Initialize the criterion, optimizer and model criterion, optimizer, model = NM.modelInit(device) # Get the batchsize batchsize = config.getBatchSize() # Train the model trainedModel = NM.trainNetwork(device, xrayDataset, trainSets, valSets, config, model, criterion, optimizer, batchsize) # Save the model to be used for testing NM.save_model(trainedModel, config.getModelName())
def list_groups_from_li(liid, sessionname): # /IQBot/api/projects/b069d79d-5df0-43dc-824f-2c44474867ca/categories?offset=0&limit=50&sort=-index&trainingNotRequired=true URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname), get_LI_LIST_GROUPS_FROM_LI_URI(liid)) headers = { 'Content-Type': "application/json", 'cache-control': "no-cache", 'X-Authorization': DataUtils.GetAuthToken(sessionname) } response = requests.request(LI_LIST_GROUPS_FROM_LI, URL, headers=headers) return response
def extract_wiki_links_from_sql_dump(page_ids, output_directory, output_filename): wiki_links = defaultdict(list) all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_page_links_dump) for record in all_records: for columns in record: pl_from, pl_title = columns[0], columns[2] if pl_from in page_ids: pl_from = page_ids[pl_from] wiki_links[pl_from].append(pl_title) DataUtils.save_json(output_directory, output_filename, wiki_links)
def change_group_status(outputFormat, sessionname, LiID, GroupNum, NewStatus): url = DataUtils.GetUrl(sessionname) TOKEN = DataUtils.GetAuthToken(sessionname) CRVERSION = DataUtils.GetCRVersion(sessionname) liid, groupMappings = IQBotCommons.GetAllGroupsFromLI(sessionname, LiID) AllGroups = [] if ("," in GroupNum): AllGroups = GroupNum.split(",") elif (GroupNum.upper() in ["ALL", "ALLGROUPS", "ALLGRPS", "EVERYTHING"]): AllGroups = list(groupMappings.keys()) else: AllGroups.append(GroupNum) AllRows = [] for Grp in AllGroups: GrpId = groupMappings[Grp] JsonData = { "GrpId": GrpId, "GroupName": Grp, "LiId": LiID, "NewStatus": NewStatus } #IsVersionSupported,CallType,ApiUri,Headers,Body = get_group_update_resources(CRVERSION,sessionname,TOKEN,LiID,GrpId,Grp,NewStatus) json_object = StdAPIUtils.generic_api_call_handler_no_post( outputFormat, sessionname, get_group_update_resources, JsonData) Success = False CurentStatus = 'no change' GroupNumber = Grp if ('success' in json_object): Success = json_object['success'] if ('data' in json_object): CurentStatus = json_object['data'] aRow = { 'GroupNumber': GroupNumber, 'CurrentState': CurentStatus, 'GroupID': GrpId, 'UpdateSuccess': Success } AllRows.append(aRow) FinalDF = pd.DataFrame(AllRows) if (outputFormat == "DF"): print(FinalDF) elif (outputFormat == "CSV"): print(FinalDF.to_csv(index=False)) else: print(FinalDF.to_json())
def StDistandAzi(traces, hyp, dir): ''' Given a list with st ids, a tuple with (lat, lon, depth) of the hypocentre and thw directory with the xml metafiles; it returns dictionary with the distance and the azimuth of each station ''' Stdistribution = {} for trid in traces: metafile = dir + "META." + trid + ".xml" META = DU.getMetadataFromXML(metafile)[trid] lat = META['latitude'] lon = META['longitude'] dist = locations2degrees(hyp[0],hyp[1],lat,lon) azi = -np.pi/180.*gps2DistAzimuth(lat,lon, hyp[0],hyp[1])[2] Stdistribution[trid] = {'azi' : azi, 'dist' : dist} #~ fig = plt.figure() #~ plt.rc('grid', color='#316931', linewidth=1, linestyle='-') #~ plt.show() return Stdistribution
def copyFileIfRelevant(cardNumber): #get the prices priceList = DataUtils.parseIntoPriceOnlyList(cardNumber, False) if priceList == None: return 0 #if it's greater than 1 dollar curPrice = priceList[len(priceList)-1] if curPrice >= 100: shutil.copyfile(getFileNameFromCardNumber(cardNumber, True), getFileNameFromCardNumber(cardNumber, False)) return 1 else: return 0
def getFileNameFromCardNumber(c, src): cardNumber = str(c) #missing 0's: mm0s = DataUtils.missing0s(cardNumber) #open the file, get the lines filename = '' filename += 'DatabaseDownloadTools/' if src: filename += 'cardPriceData' else: filename += 'RelevantCardPriceData' filename += '/CardData'+ mm0s + cardNumber+".txt" return filename
def getFileNameFromCardNumber(c, useRelevant): cardNumber = str(c) # missing 0's: mm0s = DataUtils.missing0s(cardNumber) # open the file, get the lines filename = "" filename += "DatabaseDownloadTools/" if useRelevant: filename += "RelevantCardPriceData" else: filename += "cardPriceData" filename += "/CardData" + mm0s + cardNumber + ".txt" return filename
def main(argv=sys.argv): global eplat, eplon, epdepth, orig GFdir = "/home/roberto/data/GFS/" beta = 4.e3 #m/s rho = 3.e3 #kg/m^3 mu = rho*beta*beta mu =40e9 Lbdm0min = 1e-26*np.array([125.]) Lbdsmooth = 1e-26*np.array([100.]) #~ Lbdm0min = 1e-26*np.linspace(60.,500,40) #~ Lbdsmooth = 1e-26*np.linspace(60.,500,40)#*0.5 corners = 4. fmin = 0.001 fmax = 0.005 ### Data from Chilean 2010 EQ (Same as W phase inv.) strike = 18. dip = 18. rake = 104. # 109. #rake = 45. rakeA = rake + 45. rakeB = rake - 45. #################### nsx = 21 nsy = 11 Min_h = 10. flen = 600. #Fault's longitude [km] along strike fwid = 300. #Fault's longitude [km] along dip sflen = flen/float(nsx) sfwid = fwid/float(nsy) swp = [1, 0, 2] nsf = nsx*nsy ################### t_h = 10. MISFIT = np.array([]) #RUPVEL = np.arange(1.0, 5.0, 0.05) RupVel = 2.1 # Best fit #RupVel = 2.25 #From Lay et al. #for RupVel in RUPVEL: print "****************************" print RupVel print "****************************" NP = [strike, dip, rake] NPA = [strike, dip, rakeA] NPB = [strike, dip, rakeB] M = np.array(NodalPlanetoMT(NP)) MA = np.array(NodalPlanetoMT(NPA)) MB = np.array(NodalPlanetoMT(NPB)) Mp = np.sum(M**2)/np.sqrt(2) ############# #Loading req file and EQparameters parameters={} with open(argv[1],'r') as file: for line in file: line = line.split() key = line[0] val = line[1:] parameters[key] = val #~ cmteplat = float(parameters['eplat'][0]) #~ cmteplon = float(parameters['eplon'][0]) #~ cmtepdepth=float(parameters['epdepth'][0]) orig = UTCDateTime(parameters['origin_time'][0]) ####Hypocentre from ### http://earthquake.usgs.gov/earthquakes/eqinthenews/2010/us2010tfan/ cmteplat = -35.91#-35.85#-36.03#-35.83 cmteplon = -72.73#-72.72#-72.83# -72.67 cmtepdepth= 35. eq_hyp = (cmteplat,cmteplon,cmtepdepth) ############ grid, sblt = fault_grid('CL-2010',cmteplat,cmteplon,cmtepdepth,0, Min_h,\ strike, dip, rake, flen, fwid, nsx, nsy, Verbose=False, ffi_io=True, gmt_io=True) print ('CL-2010',cmteplat,cmteplon,cmtepdepth,0, Min_h,\ strike, dip, rake, flen, fwid, nsx, nsy,\ ) print grid[0][1] #sys.exit() ############# #Loading files and setting dirs: inputfile = os.path.abspath(argv[1]) if not os.path.exists(inputfile): print inputfile, "does not exist."; exit() workdir = "/".join(inputfile.split("/")[:-1]) basename = inputfile.split("/")[-1][:-4] if workdir[-1] != "/": workdir += "/" try : os.mkdir(workdir+"WPinv") except OSError: pass#print "Directory WPtraces already exists. Skipping" trfile = open(workdir+"goodtraces.dat") trlist = [] #Loading Good traces files: while 1: line = trfile.readline().rstrip('\r\n') if not line: break trlist.append(line.split()[0]) trfile.close() ############# # Reading traces: st = read(workdir+"WPtraces/" + basename + ".decov.trim.mseed") ############################################################################# ######Determining the sf closest to the hypocentre: min_Dist_hyp_subf = flen *fwid for subf in range(nsf): sblat = grid[subf][1] sblon = grid[subf][0] sbdepth = grid[subf][2] sf_hyp = (sblat,sblon, sbdepth) Dist_hyp_subf = hypo2dist(eq_hyp,sf_hyp) if Dist_hyp_subf < min_Dist_hyp_subf: min_Dist_hyp_subf = Dist_hyp_subf min_sb_hyp = sf_hyp hyp_subf = subf print hyp_subf, min_sb_hyp, min_Dist_hyp_subf ####Determining trimming times: test_tr = read(GFdir + "H003.5/PP/GF.0001.SY.LHZ.SAC")[0] t0 = test_tr.stats.starttime TrimmingTimes = {} # Min. Distace from the fault to each station. A =0 for trid in trlist: tr = st.select(id=trid)[0] metafile = workdir + "DATA/" + "META." + tr.id + ".xml" META = DU.getMetadataFromXML(metafile)[tr.id] stlat = META['latitude'] stlon = META['longitude'] dist = locations2degrees(min_sb_hyp[0],min_sb_hyp[1],\ stlat,stlon) parrivaltime = getTravelTimes(dist,min_sb_hyp[2])[0]['time'] ta = t0 + parrivaltime tb = ta + round(15.*dist) TrimmingTimes[trid] = (ta, tb) ############################################################################## ##### DIST = [] # Ordering the stations in terms of distance for trid in trlist: metafile = workdir + "DATA/" + "META." + trid + ".xml" META = DU.getMetadataFromXML(metafile)[trid] lat = META['latitude'] lon = META['longitude'] trdist = locations2degrees(cmteplat,cmteplon,lat,lon) DIST.append(trdist) DistIndex = lstargsort(DIST) if len(argv) == 3: trlist = [argv[2]] OneStation = True else: trlist = [trlist[i] for i in DistIndex] OneStation = False ##### client = Client() ObservedDisp = np.array([]) gridlat = [] gridlon = [] griddepth = [] sbarea = [] mindist = flen*fwid # min distance hyp-subfault ##########Loop for each subfault for subf in range(nsf): print "**********" print subf eplat = grid[subf][1] eplon = grid[subf][0] epdepth = grid[subf][2] ## Storing the subfault's location centered in the hypcenter gridlat.append(eplat-cmteplat) gridlon.append(eplon-cmteplon) griddepth.append(epdepth) strike = grid[subf][3] #+ 360. dip = grid[subf][4] rake = grid[subf][5] # NP = [strike, dip, rake] M = np.array(NodalPlanetoMT(NP)) #Calculating the time dalay: sf_hyp = (eplat,eplon, epdepth) Dist_ep_subf = hypo2dist(eq_hyp,sf_hyp) t_d = round(Dist_ep_subf/RupVel) #-59. print eplat,eplon, epdepth #t_d = 0. # Determining depth dir: depth = [] depthdir = [] for file in os.listdir(GFdir): if file[-2:] == ".5": depthdir.append(file) depth.append(float(file[1:-2])) BestDirIndex = np.argsort(abs(epdepth-np.array(depth)))[0] hdir = GFdir + depthdir[BestDirIndex] + "/" # hdir is the absolute path to the closest deepth. SYN = np.array([]) SYNA = np.array([]) SYNB = np.array([]) #Main loop : for trid in trlist: tr = st.select(id=trid)[0] metafile = workdir + "DATA/" + "META." + tr.id + ".xml" META = DU.getMetadataFromXML(metafile)[tr.id] lat = META['latitude'] lon = META['longitude'] trPPsy, trRRsy, trRTsy, trTTsy = \ GFSelectZ(lat,lon,hdir) tr.stats.delta = trPPsy.stats.delta azi = -np.pi/180.*gps2DistAzimuth(lat,lon,\ eplat,eplon)[2] trROT = MTrotationZ(azi, trPPsy, trRRsy, trRTsy, trTTsy) #Triangle dt = trROT[0].stats.delta trianglen = 2.*t_h/dt-1. window = triang(trianglen) window /= np.sum(window) #window = np.array([1.]) FirstValid = int(trianglen/2.) + 1 dist = locations2degrees(eplat,eplon,lat,lon) parrivaltime = getTravelTimes(dist,epdepth)[0]['time'] t1 = TrimmingTimes[trid][0] - t_d t2 = TrimmingTimes[trid][1] - t_d #~ t1 = trROT[0].stats.starttime + parrivaltime- t_d #~ t2 = t1+ round(MinDist[tr.id]*15. ) N = len(trROT[0]) for trR in trROT: trR.data *= 10.**-21 ## To get M in Nm trR.data -= trR.data[0] AUX1 = len(trR) trR.data = convolve(trR.data,window,mode='valid') AUX2 = len(trR) mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\ trR.data[:60./trR.stats.delta*1.-FirstValid+1]))) #mean = np.mean(trR.data[:60]) trR.data -= mean trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats. delta, corners , 1 , fmin, fmax) t_l = dt*0.5*(AUX1 - AUX2) trR.trim(t1-t_l,t2-t_l, pad=True, fill_value=trR.data[0]) #We lost t_h due to the convolution #~ for trR in trROT: #~ trR.data *= 10.**-23 ## To get M in Nm #~ trR.data -= trR.data[0] #~ trR.data = convolve(trR.data,window,mode='same') #~ # mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\ #~ # trR.data[:60./trR.stats.delta*1.-FirstValid+1]))) #~ mean = np.mean(trR.data[:60]) #~ trR.data -= mean #~ trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\ #~ corners ,1 , fmin, fmax) #~ trR.trim(t1,t2,pad=True, fill_value=trR.data[0]) nmin = min(len(tr.data),len(trROT[0].data)) tr.data = tr.data[:nmin] for trR in trROT: trR.data = trR.data[:nmin] ############# trROT = np.array(trROT) syn = np.dot(trROT.T,M) synA = np.dot(trROT.T,MA) synB = np.dot(trROT.T,MB) SYN = np.append(SYN,syn) SYNA = np.append(SYNA,synA) SYNB = np.append(SYNB,synB) if subf == 0 : ObservedDisp = np.append(ObservedDisp,tr.data,0) sbarea.append(grid[subf][6]) print np.shape(A), np.shape(np.array([SYN])) if subf == 0: A = np.array([SYN]) AA = np.array([SYNA]) AB = np.array([SYNB]) else: A = np.append(A,np.array([SYN]),0) AA = np.append(AA,np.array([SYNA]),0) AB = np.append(AB,np.array([SYNB]),0) #Full matrix with the two rake's component AC = np.vstack((AA,AB)) #MISFIT = np.array([]) ########## Stabilizing the solution: #### Moment minimization: #~ constraintD = np.zeros(nsf) #~ ObservedDispcons = np.append(ObservedDisp,constraintD) #~ for lbd in Lbd: #~ constraintF = lbd*np.eye(nsf,nsf) #~ Acons = np.append(A,constraintF,1) #~ print np.shape(Acons.T), np.shape(ObservedDispcons) #~ R = nnls(Acons.T,ObservedDispcons) #~ M = R[0] #~ #M = np.zeros(nsf) #~ #M[::2] = 1 #~ fit = np.dot(A.T,M) #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\ #~ /np.sum(np.abs(ObservedDisp)) #~ MISFIT = np.append(MISFIT,misfit) #~ plt.figure() #~ plt.plot(Lbd,MISFIT) #~ ########################################### #~ ### Smoothing: #~ constraintF_base = SmoothMatrix(nsx,nsy) #~ constraintD = np.zeros(np.shape(constraintF_base)[0]) #~ ObservedDispcons = np.append(ObservedDisp,constraintD) #~ for lbd in Lbd: #~ constraintF = lbd*constraintF_base #~ Acons = np.append(A,constraintF.T,1) #~ #print np.shape(Acons.T), np.shape(ObservedDispcons) #~ R = nnls(Acons.T,ObservedDispcons) #~ M = R[0] #~ fit = np.dot(A.T,M) #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\ #~ /np.sum(np.abs(ObservedDisp)) #~ print lbd, misfit #~ MISFIT = np.append(MISFIT,misfit) #~ ########################################### ########################################### #~ ##### Moment Minimization (including rake projections): #~ constraintD = np.zeros(2*nsf) #~ ObservedDispcons = np.append(ObservedDisp,constraintD) #~ for lbd in Lbd: #~ constraintF = lbd*np.eye(2*nsf,2*nsf) #~ ACcons = np.append(AC,constraintF,1) #~ print np.shape(ACcons.T), np.shape(ObservedDispcons) #~ R = nnls(ACcons.T,ObservedDispcons) #~ M = R[0] #~ fit = np.dot(AC.T,M) #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\ #~ /np.sum(np.abs(ObservedDisp)) #~ MISFIT = np.append(MISFIT,misfit) #~ M = np.sqrt(M[:nsf]**2+M[nsf:]**2) ############################################## ### Smoothing (including rake projections): #~ constraintF_base = SmoothMatrix(nsx,nsy) #~ Nbase = np.shape(constraintF_base)[0] #~ constraintD = np.zeros(2*Nbase) #~ constraintF_base_big = np.zeros((2*Nbase, 2*nsf)) #~ constraintF_base_big[:Nbase,:nsf]= constraintF_base #~ constraintF_base_big[Nbase:,nsf:]= constraintF_base #~ ObservedDispcons = np.append(ObservedDisp,constraintD) #~ for lbd in Lbd: #~ constraintF = lbd*constraintF_base_big #~ ACcons = np.append(AC,constraintF.T,1) #~ #print np.shape(Acons.T), np.shape(ObservedDispcons) #~ R = nnls(ACcons.T,ObservedDispcons) #~ M = R[0] #~ fit = np.dot(AC.T,M) #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\ #~ /np.sum(np.abs(ObservedDisp)) #~ print lbd, misfit #~ MISFIT = np.append(MISFIT,misfit) #~ M = np.sqrt(M[:nsf]**2+M[nsf:]**2) ########################################### #~ ##### Moment Minimization and Smoothing #~ #### (including rake projections): #~ mom0 = [] #~ constraintF_base = SmoothMatrix(nsx,nsy) #~ Nbase = np.shape(constraintF_base)[0] #~ constraintDsmoo = np.zeros(2*Nbase) #~ constraintDmin = np.zeros(2*nsf) #~ constraintF_base_big = np.zeros((2*Nbase, 2*nsf)) #~ constraintF_base_big[:Nbase,:nsf]= constraintF_base #~ constraintF_base_big[Nbase:,nsf:]= constraintF_base #~ ObservedDispcons = np.concatenate((ObservedDisp, #~ constraintDmin, #~ constraintDsmoo )) #~ for lbdm0 in Lbdm0min: #~ constraintFmin = lbdm0*np.eye(2*nsf,2*nsf) #~ for lbdsm in Lbdsmooth: #~ constraintFsmoo = lbdsm*constraintF_base_big #~ ACcons = np.hstack((AC, constraintFmin, constraintFsmoo.T)) #~ print lbdm0, lbdsm #~ R = nnls(ACcons.T,ObservedDispcons) #~ M = R[0] #~ fit = np.dot(AC.T,M) #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\ #~ /np.sum(np.abs(ObservedDisp)) #~ MISFIT = np.append(MISFIT,misfit) #~ MA = M[:nsf] #~ MB = M[nsf:] #~ M = np.sqrt(MA**2+MB**2) #~ mom0.append(np.sum(M)) ############################################## # Rotation to the rake's conventional angle: #MB, MA = Rot2D(MB,MA,-rakeB) print np.shape(M), np.shape(A.T) R = nnls(A.T,ObservedDisp) M = R[0] #~ M = np.zeros(nsf) #~ M[::2] = 1 fit = np.dot(A.T,M) MA = M MB = M np.save("RealSol", M) nm0 = np.size(Lbdm0min) nsmth = np.size(Lbdsmooth) #~ plt.figure() #~ plt.pcolor(1./Lbdsmooth, 1./Lbdm0min,MISFIT.reshape(nm0,nsmth)) #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24) #~ plt.ylabel(r'$1/ \lambda_{1}$',fontsize = 24 ) #~ plt.ylim((1./Lbdm0min).min(),(1./Lbdm0min).max() ) #~ plt.ylim((1./Lbdsmooth).min(),(1./Lbdsmooth).max() ) #~ cbar = plt.colorbar() #~ cbar.set_label("Misfit %") #~ print np.shape(Lbdm0min), np.shape(mom0) #~ plt.figure() #~ CS = plt.contour(1./Lbdsmooth, 1./Lbdm0min,MISFIT.reshape(nm0,nsmth) ) #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24) #~ plt.ylabel(r'$1/ \lambda_{1}$',fontsize = 24 ) #~ plt.clabel(CS, inline=1, fontsize=10) #~ plt.title('Misfit') #~ plt.figure() #~ plt.plot(1./Lbdm0min,MISFIT) #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24) #~ plt.ylabel("Misfit %") #~ plt.figure() #~ plt.plot(Lbdm0min,mom0) #~ plt.ylabel(r'$M_0\, [Nm]$', fontsize = 24) #~ plt.xlabel(r'$\lambda_{M0}$', fontsize = 24) misfit = 100.*np.sum(np.abs(fit-ObservedDisp))/np.sum(np.abs(ObservedDisp)) print "Residual: ", 1000.*R[1] print misfit #SLIP = M*Mp/mu/(1.e6*np.array(sbarea)) sbarea = sflen*sfwid SLIP = M/(mu*1.e6*sbarea) SLIP = SLIP.reshape(nsx,nsy).T[::-1] moment = M.reshape(nsx,nsy).T[::-1] plt.figure(figsize = (13,5)) plt.plot(fit,'b' ,label="Fit") plt.plot(ObservedDisp,'r',label="Observed") plt.xlabel("Time [s]") plt.ylabel("Displacement [m]") plt.legend() np.set_printoptions(linewidth=1000,precision=3) print "***********" print sbarea print SLIP print np.mean(SLIP) print "Moment:" print np.sum(M) ### SLIPS Distribution (as the synthetics) : SLIPS = M.reshape(nsx,nsy).T SLIPS /= mu*1.e6*sbarea #~ #########Ploting slip distribution: #~ #we are going to reflect the y axis later, so: hypsbloc = [hyp_subf / nsy , -(hyp_subf % nsy) - 2] #Creating the strike and dip axis: StrikeAx= np.linspace(0,flen,nsx+1) DipAx= np.linspace(0,fwid,nsy+1) DepthAx = DipAx*np.sin(np.pi/180.*dip) + Min_h print DepthAx hlstrike = StrikeAx[hypsbloc[0]] + sflen*0.5 #we are going to reflect the axis later, so: hldip = DipAx[hypsbloc[1]] + sfwid*0.5 hldepth = DepthAx[hypsbloc[1]] + sfwid*0.5*np.sin(np.pi/180.*dip) StrikeAx = StrikeAx - hlstrike DipAx = DipAx - hldip XX, YY = np.meshgrid(StrikeAx, DepthAx) XX, ZZ = np.meshgrid(StrikeAx, DipAx ) ######Plot: (Old colormap: "gist_rainbow_r") plt.figure(figsize = (13,6)) ax = host_subplot(111) im = ax.pcolor(XX, YY, SLIPS, cmap="jet") ax.set_ylabel('Depth [km]') ax.set_ylim(DepthAx[-1],DepthAx[0]) # Creating a twin plot ax2 = ax.twinx() im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="jet") ax2.set_ylabel('Distance along the dip [km]') ax2.set_xlabel('Distance along the strike [km]') ax2.set_ylim(DipAx[0],DipAx[-1]) ax2.set_xlim(StrikeAx[0],StrikeAx[-1]) ax.axis["bottom"].major_ticklabels.set_visible(False) ax2.axis["bottom"].major_ticklabels.set_visible(False) ax2.axis["top"].set_visible(True) ax2.axis["top"].label.set_visible(True) divider = make_axes_locatable(ax) cax = divider.append_axes("bottom", size="5%", pad=0.1) cb = plt.colorbar(im, cax=cax, orientation="horizontal") cb.set_label("Slip [m]") ax2.plot([0], [0], '*', ms=225./(nsy+4)) ax2.set_xticks(ax2.get_xticks()[1:-1]) #~ ### Rake plot: plt.figure(figsize = (13,6)) fig = host_subplot(111) XXq, ZZq = np.meshgrid(StrikeAx[:-1]+sflen, DipAx[:-1]+sfwid ) Q = plt.quiver(XXq,ZZq, MB.reshape(nsx,nsy).T[::-1,:]/(mu*1.e6*sbarea), MA.reshape(nsx,nsy).T[::-1,:]/(mu*1.e6*sbarea), SLIPS[::-1,:], units='xy',scale = 0.5 , linewidths=(2,), edgecolors=('k'), headaxislength=5 ) fig.set_ylim([ZZq.min()-80,ZZq.max()+80]) fig.set_xlim([XXq.min()-20, XXq.max()+20 ]) fig.set_ylabel('Distance along dip [km]') fig.set_xlabel('Distance along the strike [km]') fig2 = fig.twinx() fig2.set_xlabel('Distance along the strike [km]') fig.axis["bottom"].major_ticklabels.set_visible(False) fig.axis["bottom"].label.set_visible(False) fig2.axis["top"].set_visible(True) fig2.axis["top"].label.set_visible(True) fig2.axis["right"].major_ticklabels.set_visible(False) divider = make_axes_locatable(fig) cax = divider.append_axes("bottom", size="5%", pad=0.1) cb = plt.colorbar(im, cax=cax, orientation="horizontal") cb.set_label("Slip [m]") plt.show() ############# #~ print np.shape(MISFIT), np.shape(RUPVEL) #~ plt.figure() #~ plt.plot(RUPVEL,MISFIT) #~ plt.xlabel("Rupture Velocity [km/s]") #~ plt.ylabel("Misfit %") #~ plt.show() print np.shape(MB.reshape(nsx,nsy).T) print np.shape(ZZ)
def main(argv=sys.argv): #Earth's parameters #~ beta = 4.e3 #m/s #~ rho = 3.e3 #kg/m^3 #~ mu = rho*beta*beta PLotSt = ["IU.TRQA.00.LHZ", "IU.LVC.00.LHZ", "II.NNA.00.LHZ", "IU.RAR.00.LHZ"] #PlotSubf = [143, 133, 123, 113, 103, 93, # 83, 73, 63, 53] PlotSubf = [6,3] #Set rup_vel = 0 to have a point source solution RupVel = 2.1 #Chilean eq from Lay et al t_h = 10. # Half duration for each sf noiselevel = 0.0# L1 norm level of noise mu =40e9 #W-Phase filter corners = 4. fmin = 0.001 fmax = 0.005 ### Data from Chilean 2010 EQ (Same as W phase inv.) strike = 18. dip = 18. rake = 104. # 109. rakeA = rake + 45. rakeB = rake - 45. ### Fault's grid parameters nsx = 21 #Number of sf along strike nsy = 11 #Number of sf along dip flen = 600. #Fault's longitude [km] along strike fwid = 300. #Fault's longitude [km] along dip direc = 0 #Directivity 0 = bilateral Min_h = 10. #Min depth of the fault ### Derivated parameters: nsf = nsx*nsy sflen = flen/float(nsx) sfwid = fwid/float(nsy) swp = [1, 0, 2] # useful to swap (lat,lon, depth) mindist = flen*fwid # minimun dist to the hypcen (initializing) ###Chessboard #weight = np.load("RealSol.npy") weight = np.zeros(nsf) weight[::2] = 1 #weight[::2] = 1 #~ weight[10]=15 #~ weight[5001]=10 #~ weight[3201]=2 ## Setting dirs and reading files. GFdir = "/home/roberto/data/GFS/" workdir = os.path.abspath(".")+"/" datadir = workdir + "DATA/" tracesfilename = workdir + "goodtraces.dat" tracesdir = workdir + "WPtraces/" try: reqfilename = glob.glob(workdir + '*.syn.req')[0] except IndexError: print "There is not *.syn.req file in the dir" sys.exit() basename = reqfilename.split("/")[-1][:-4] if not os.path.exists(tracesfilename): print tracesfilename, "does not exist." exit() if not os.path.exists(datadir): os.makedirs(datadir) if not os.path.exists(tracesdir): os.makedirs(tracesdir) tracesfile = open(tracesfilename) reqfile = open(reqfilename) trlist = readtraces(tracesfile) eqdata = readreq(reqfile) tracesfile.close() reqfile.close() ####Hypocentre from ### http://earthquake.usgs.gov/earthquakes/eqinthenews/2010/us2010tfan/ cmteplat = -35.91#-35.85#-36.03#-35.83 cmteplon = -72.73#-72.72#-72.83# -72.67 cmtepdepth= 35. eq_hyp = (cmteplat,cmteplon,cmtepdepth) ############ # Defining the sf system grid, sblt = fault_grid('CL-2010',cmteplat,cmteplon, cmtepdepth, direc, Min_h, strike, dip, rake, flen,fwid ,nsx,nsy, Verbose=False,ffi_io=True,gmt_io=True) print ('CL-2010',cmteplat,cmteplon, cmtepdepth, direc, Min_h, strike, dip, rake, flen,fwid ,nsx,nsy) print grid[0][1] #sys.exit() #This calculation is inside of the loop #~ NP = [strike, dip, rake] #~ M = np.array(NodalPlanetoMT(NP)) #~ Mp = np.sum(M**2)/np.sqrt(2) ############################################################################# ######Determining the sf closest to the hypocentre: min_Dist_hyp_subf = flen *fwid for subf in range(nsf): sblat = grid[subf][1] sblon = grid[subf][0] sbdepth = grid[subf][2] sf_hyp = (sblat,sblon, sbdepth) Dist_hyp_subf = hypo2dist(eq_hyp,sf_hyp) if Dist_hyp_subf < min_Dist_hyp_subf: min_Dist_hyp_subf = Dist_hyp_subf min_sb_hyp = sf_hyp hyp_subf = subf ####Determining trimming times: test_tr = read(GFdir + "H003.5/PP/GF.0001.SY.LHZ.SAC")[0] t0 = test_tr.stats.starttime TrimmingTimes = {} # Min. Distace from the fault to each station. A =0 for trid in trlist: metafile = workdir + "DATA/" + "META." + trid + ".xml" META = DU.getMetadataFromXML(metafile)[trid] stlat = META['latitude'] stlon = META['longitude'] dist = locations2degrees(min_sb_hyp[0],min_sb_hyp[1],\ stlat,stlon) parrivaltime = getTravelTimes(dist,min_sb_hyp[2])[0]['time'] ta = t0 + parrivaltime tb = ta + round(15.*dist) TrimmingTimes[trid] = (ta, tb) ########################### DIST = [] # Ordering the stations in terms of distance for trid in trlist: metafile = workdir + "DATA/" + "META." + trid + ".xml" META = DU.getMetadataFromXML(metafile)[trid] lat = META['latitude'] lon = META['longitude'] trdist = locations2degrees(cmteplat, cmteplon,lat,lon) DIST.append(trdist) DistIndex = lstargsort(DIST) trlist = [trlist[i] for i in DistIndex] stdistribution = StDistandAzi(trlist, eq_hyp , workdir + "DATA/") StDistributionPlot(stdistribution) #exit() #Main loop for subf in range(nsf): print subf sflat = grid[subf][1] sflon = grid[subf][0] sfdepth = grid[subf][2] #~ strike = grid[subf][3] #+ 360. #~ dip = grid[subf][4] #~ rake = grid[subf][5] # NP = [strike, dip, rake] NPA = [strike, dip, rakeA] NPB = [strike, dip, rakeB] M = np.array(NodalPlanetoMT(NP)) MA = np.array(NodalPlanetoMT(NPA)) MB = np.array(NodalPlanetoMT(NPB)) #Time delay is calculated as the time in which #the rupture reach the subfault sf_hyp = (sflat, sflon, sfdepth) Dist_ep_subf = hypo2dist(eq_hyp,sf_hyp) if Dist_ep_subf < mindist: mindist = Dist_ep_subf minsubf = subf if RupVel == 0: t_d = eqdata['time_shift'] else: t_d = round(Dist_ep_subf/RupVel) #-59. print sflat, sflon, sfdepth # Looking for the best depth dir: depth = [] depthdir = [] for file in os.listdir(GFdir): if file[-2:] == ".5": depthdir.append(file) depth.append(float(file[1:-2])) BestDirIndex = np.argsort(abs(sfdepth\ - np.array(depth)))[0] hdir = GFdir + depthdir[BestDirIndex] + "/" ### SYN = np.array([]) SYNA = np.array([]) SYNB = np.array([]) for trid in trlist: metafile = workdir + "DATA/" + "META." + trid + ".xml" META = DU.getMetadataFromXML(metafile)[trid] lat = META['latitude'] lon = META['longitude'] #Subfault loop #GFs Selection: ##Change to folloing loop dist = locations2degrees(sflat,sflon,lat,lon) azi = -np.pi/180.*gps2DistAzimuth(lat,lon, sflat,sflon)[2] trPPsy, trRRsy, trRTsy, trTTsy = \ GFSelectZ(hdir,dist) trROT = MTrotationZ(azi, trPPsy, trRRsy, trRTsy, trTTsy) orig = trROT[0].stats.starttime dt = trROT[0].stats.delta trianglen = 2.*int(t_h/dt)-1. FirstValid = int(trianglen/2.) + 1 # to delete window = triang(trianglen) window /= np.sum(window) #window = np.array([1.]) parrivaltime = getTravelTimes(dist,sfdepth)[0]['time'] t1 = TrimmingTimes[trid][0] - t_d t2 = TrimmingTimes[trid][1] - t_d for trR in trROT: trR.data *= 10.**-21 ## To get M in Nm trR.data -= trR.data[0] AUX1 = len(trR) trR.data = convolve(trR.data,window,mode='valid') AUX2 = len(trR) mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\ trR.data[:60./trR.stats.delta*1.-FirstValid+1]))) #mean = np.mean(trR.data[:60]) trR.data -= mean trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\ corners , 1 , fmin, fmax) t_l = dt*0.5*(AUX1 - AUX2) trR.trim(t1-t_l,t2-t_l, pad=True, fill_value=trR.data[0]) #We lost t_h due to the convolution #~ for trR in trROT: #~ trR.data *= 10.**-23 ## To get M in Nm #~ trR.data -= trR.data[0] #~ trR.data = convolve(trR.data,window,mode='same') #~ #mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\ #~ #trR.data[:60./trR.stats.delta*1.-FirstValid+1]))) #~ mean = np.mean(trR.data[:60]) #~ trR.data -= mean #~ trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\ #~ corners , 1 , fmin, fmax) #~ trR.trim(t1,t2,pad=True, fill_value=trR.data[0]) trROT = np.array(trROT) syn = np.dot(trROT.T,M) synA = np.dot(trROT.T,MA) synB = np.dot(trROT.T,MB) SYN = np.append(SYN,syn) SYNA = np.append(SYNA,synA) SYNB = np.append(SYNB,synB) print np.shape(A), np.shape(np.array([SYN])) if subf == 0: A = np.array([SYN]) AA = np.array([SYNA]) AB = np.array([SYNB]) else: A = np.append(A,np.array([SYN]),0) AA = np.append(AA,np.array([SYNA]),0) AB = np.append(AB,np.array([SYNB]),0) AC = np.vstack((AA,AB)) print np.shape(AC) print np.shape(weight) B = np.dot(A.T,weight) stsyn = Stream() n = 0 Ntraces= {} for trid in trlist: spid = trid.split(".") print trid NMIN = 1. + (TrimmingTimes[trid][1] - TrimmingTimes[trid][0]) / dt Ntraces[trid] = (n,NMIN + n) trsyn = Trace(B[n:NMIN+n]) n += NMIN trsyn.stats.network = spid[0] trsyn.stats.station = spid[1] trsyn.stats.location = spid[2] trsyn.stats.channel = spid[3] trsyn = AddNoise(trsyn,level = noiselevel) #trsyn.stats.starttime = stsyn.append(trsyn) stsyn.write(workdir+"WPtraces/" + basename + ".decov.trim.mseed", format="MSEED") ##################################################### # Plotting: ##################################################### #we are going to reflect the y axis later, so: print minsubf hypsbloc = [minsubf / nsy , -(minsubf % nsy) - 2] #Creating the strike and dip axis: StrikeAx= np.linspace(0,flen,nsx+1) DipAx= np.linspace(0,fwid,nsy+1) DepthAx = DipAx*np.sin(np.pi/180.*dip) + Min_h hlstrike = StrikeAx[hypsbloc[0]] + sflen*0.5 hldip = DipAx[hypsbloc[1]] + sfwid*0.5 hldepth = DepthAx[hypsbloc[1]] + sfwid*0.5*np.sin(np.pi/180.*dip) StrikeAx = StrikeAx - hlstrike DipAx = DipAx - hldip XX, YY = np.meshgrid(StrikeAx, DepthAx) XX, ZZ = np.meshgrid(StrikeAx, DipAx ) sbarea = sflen*sfwid SLIPS = weight.reshape(nsx,nsy).T#[::-1,:] SLIPS /= mu*1.e6*sbarea ######Plot:##################### plt.figure() ax = host_subplot(111) im = ax.pcolor(XX, YY, SLIPS, cmap="jet") ax.set_ylabel('Depth [km]') ax.set_ylim(DepthAx[-1],DepthAx[0]) # Creating a twin plot ax2 = ax.twinx() #im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="Greys") im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="jet") ax2.set_ylabel('Distance along the dip [km]') ax2.set_xlabel('Distance along the strike [km]') ax2.set_ylim(DipAx[0],DipAx[-1]) ax2.set_xlim(StrikeAx[0],StrikeAx[-1]) ax.axis["bottom"].major_ticklabels.set_visible(False) ax2.axis["bottom"].major_ticklabels.set_visible(False) ax2.axis["top"].set_visible(True) ax2.axis["top"].label.set_visible(True) divider = make_axes_locatable(ax) cax = divider.append_axes("bottom", size="5%", pad=0.1) cb = plt.colorbar(im, cax=cax, orientation="horizontal") cb.set_label("Slip [m]") ax2.plot([0], [0], '*', ms=225./(nsy+4)) ax2.set_xticks(ax2.get_xticks()[1:-1]) #ax.set_yticks(ax.get_yticks()[1:]) #ax2.set_yticks(ax2.get_yticks()[:-1]) #########Plotting the selected traces: nsp = len(PLotSt) * len(PlotSubf) plt.figure(figsize=(13,11)) plt.title("Synthetics for rake = " + str(round(rake))) mindis = [] maxdis = [] for i, trid in enumerate(PLotSt): x = np.arange(0,Ntraces[trid][1]-Ntraces[trid][0], dt) for j, subf in enumerate(PlotSubf): y = A[subf, Ntraces[trid][0]:Ntraces[trid][1]] if j == 0: yy = y else: yy = np.vstack((yy,y)) maxdis.append(np.max(yy)) mindis.append(np.min(yy)) for i, trid in enumerate(PLotSt): x = np.arange(0,Ntraces[trid][1]-Ntraces[trid][0], dt) for j, subf in enumerate(PlotSubf): y = A[subf, Ntraces[trid][0]:Ntraces[trid][1]] plt.subplot2grid((len(PlotSubf), len(PLotSt)), (j, i)) plt.plot(x,y, linewidth=2.5) if j == 0: plt.title(trid) fig = plt.gca() fig.axes.get_yaxis().set_ticks([]) fig.set_ylabel(str(subf),rotation=0) fig.set_xlim((x[0],x[-1])) fig.set_ylim((mindis[i],maxdis[i])) if subf != PlotSubf[-1]: fig.axes.get_xaxis().set_ticks([]) plt.show()
import DataUtils as util def checkEncoding(inputText, code, outputText): if inputText == outputText: print "OK -", inputText else: print "WRONG - input:", inputText, "- code:", code, "- output:", outputText # Test encoding of order array = ["chain-prev", "bfs", "chain", "bfs-prev"] for order in array: code = util.encode_order(order) order2 = util.decode_order(code) checkEncoding(order, code, order2) # Test encoding of saturation array = ["sat-like", "none", "sat-loop"] for sat in array: code = util.encode_sat(sat) sat2 = util.decode_sat(code) checkEncoding(sat, code, sat2) # Test encoding of saturation granularity array = ["40", "1", "20", "2147483647", "80", "20", "5", ""] for gran in array: code = util.encode_gran(gran) gran2 = util.decode_gran(code) checkEncoding(gran, code, gran2)
import DataUtils print DataUtils.removeMultipleRows(range(9), [1,3,5]) print DataUtils.removeMultipleRows(range(9), [0,1,8]) print DataUtils.removeMultipleRows(range(9), [4,5,6,7]) matrix = [range(5), range(5,10), range(10,15), range(15,20)] print DataUtils.removeMultipleColumns(matrix, [0,4]) print DataUtils.removeMultipleColumns(matrix, [3]) print DataUtils.removeMultipleColumns(matrix, [1,2,3]) print DataUtils.removeMultipleColumns(matrix, [0,2,4])
import DataUtils from sklearn import svm import Metrics def inboxLabels(labels): for row in range(len(labels)): labels[row] = 100*labels[row][0] + 10*labels[row][1] + labels[row][2] return labels missingCount = 4 for setNo in range(3,6): setNo = str(setNo) # Import set setNo dataSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-train.csv") testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv") names = dataSet.names features = dataSet.features # Inbox labels labels = inboxLabels(dataSet.labels) testLabels = inboxLabels(testSet.labels) missingCombinations = DataUtils.nPrCombinations(range(len(names)), missingCount) print "SET", setNo for missingColumns in missingCombinations: # Print omitted featus #for col in missingColumns:
# Prepare training data directory = "/home/richard/Project/masterproject_benchmark/R_source/Models/Time/" setNo = "2" import DataUtils dataSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-train.csv") features = dataSet.features labels = dataSet.labels # Select classifier from sklearn import tree from sklearn import neighbors #classifier = tree.DecisionTreeClassifier() classifier = neighbors.KNeighborsClassifier(n_neighbors=6, weights="uniform") #classifier = neighbors.RadiusNeighborsClassifier(radius=100000000.0) # Train classifier classifier = classifier.fit(features, labels) # Test classifier testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv") predictions = classifier.predict(testSet.features) # Write results to csv file results = DataUtils.ResultSet(testSet.id, predictions, testSet.labels) DataUtils.write_resultset(results, directory +"Result-"+ setNo +".csv") dim = len(predictions[0]) # Determine metrics based on confusion matrix
def getPriceVectorDescriptor(): return DUtils.getPVD()
def getPriceVector(cardNumber,useRelevant): return DUtils.parseIntoPriceOnlyList(cardNumber,useRelevant)