Example #1
0
def login(crversion, url, login, password, sessionname):

    IsVersionSupported, CallType, ApiUri, Headers, Body = get_login_resources(
        crversion, login, password, sessionname)

    if not IsVersionSupported:
        logging.debug("Unsupported CR Version: {}".format(crversion))
        print("Unsupported CR Version")
        exit(1)

    FULLURL = urllib.parse.urljoin(url, ApiUri)

    response = requests.request(CallType, FULLURL, data=Body, headers=Headers)

    isAPICallOK = StdResponses.processAPIResponse(response)
    if (not isAPICallOK):
        exit(99)
    else:
        #print("DEBUG"+str(response.text))
        isError, Code = AuthResponses.Process_Auth_Login_Response(response)
        if (not isError):
            DataUtils.StoreAuthToken(Code, sessionname)
            DataUtils.StoreUrl(url, sessionname)
            DataUtils.StoreCRVersion(crversion, sessionname)
            print("Token Stored in session: " + sessionname)
def generic_api_call_handler(outputFormat,sessionname,get_res_func,res_data,df_transform_func):
        url = DataUtils.GetUrl(sessionname)
        TOKEN = DataUtils.GetAuthToken(sessionname)
        CRVERSION = DataUtils.GetCRVersion(sessionname)

        IsVersionSupported,CallType,ApiUri,Headers,Body = get_res_func(CRVERSION,sessionname,TOKEN,res_data)

        if not IsVersionSupported:
            logging.debug("Unsupported CR Version: {}".format(crversion))
            print("Unsupported CR Version")
            exit(1)

        FULLURL = urllib.parse.urljoin(url,ApiUri)

        response = requests.request(method=CallType, url=FULLURL, data=Body, headers=Headers)

        isAPICallOK = StdResponses.processAPIResponse(response)
        if(not isAPICallOK):
            exit(99)
        else:
            json_object = json.loads(response.text)
            if (outputFormat == "DF"):
                #print(json_object)
                aDF = df_transform_func(json_object)
                print(aDF)
            elif (outputFormat == "CSV"):
                #print(json_object)
                aDF = df_transform_func(json_object)
                print(aDF.to_csv(index=False))
            else:
                #print(json_object)
                json_formatted_str = json.dumps(json_object, indent=2)
                print(json_formatted_str)
Example #3
0
def build_abstract_tuples():
    abstracts_directory = Config.extracted_abstracts_dir
    revision_ids_directory = Config.extracted_revision_ids_dir
    abstracts_filenames = sorted(os.listdir(abstracts_directory))
    revision_ids_filenames = sorted(os.listdir(revision_ids_directory))
    for abstract_filename, revision_ids_filename in zip(
            abstracts_filenames, revision_ids_filenames):
        tuples = list()
        abstracts = DataUtils.load_json(abstracts_directory, abstract_filename)
        revision_ids = DataUtils.load_json(revision_ids_directory,
                                           revision_ids_filename)
        for page_name in abstracts:
            json_dict = dict()
            json_dict['template_name'] = None
            json_dict['template_type'] = None
            json_dict[
                'subject'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace(
                    ' ', '_')
            json_dict['predicate'] = 'abstract'
            json_dict['object'] = abstracts[page_name]
            json_dict[
                'source'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace(
                    ' ', '_')
            json_dict['version'] = revision_ids[page_name]
            tuples.append(json_dict)
        DataUtils.save_json(Config.final_abstract_tuples_dir,
                            abstract_filename, tuples)
Example #4
0
	def callbackMain(self):
		app = App.get_running_app()
		data_dir = app.user_dir + "/datasets"
		dataUtils = DataUtils(data_dir)
		dataUtils.mergeFiles()	# creates datasets for NN training
		self.ids["up"].clear_widgets()
		sm.current = "Main"
Example #5
0
def extract_image_names_from_sql_dump():
    image_names_types = dict()

    # https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072
    max_int = sys.maxsize

    while True:
        # decrease the maxInt value by factor 10
        # as long as the OverflowError occurs.
        try:
            csv.field_size_limit(max_int)
            break
        except OverflowError:
            max_int = int(max_int / 10)

    all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_images_dump,
                                        quotechar='"')
    for record in all_records:
        for columns in record:
            image_name, image_type = columns[0].strip("'"), columns[8].strip(
                "'")
            image_names_types[image_name] = image_type

    DataUtils.save_json(Config.extracted_image_names_types_dir,
                        Config.extracted_image_names_types_filename,
                        image_names_types)
Example #6
0
def get_learning_instance_detail(learningInstanceName="",
                                 learningInstanceID="",
                                 sessionname="",
                                 CsvOutput=False,
                                 ProcessOutput=True):

    if (learningInstanceName != ""):
        learningInstanceID = IQBotCommons.ConvertLINameToLIID(
            sessionname, learningInstanceName)

    URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname),
                               get_LI_DETAIL_URI(learningInstanceID))

    headers = {
        'Content-Type': "application/json",
        'cache-control': "no-cache",
        'X-Authorization': DataUtils.GetAuthToken(sessionname)
    }

    response = requests.request(LI_DETAIL_REQ_TYPE, URL, headers=headers)
    if (ProcessOutput):
        isInError = IQBotLIResponses.Process_LI_Detail_Response(
            response, CsvOutput)
    else:
        return response
Example #7
0
def save_sql_dump(directory, filename, sql_dump, encoding='utf8'):
    if len(directory) < 255:
        DataUtils.create_directory(directory)
        sql_filename = join(directory, filename)
        sql_file = open(sql_filename, 'w+', encoding=encoding)
        sql_file.write(sql_dump)
        sql_file.close()
Example #8
0
def build_category_tuples():
    category_directory = Config.extracted_category_links_dir
    category_filename = Config.extracted_category_links_filename
    categories = DataUtils.load_json(category_directory, category_filename)
    revision_ids_directory = Config.extracted_revision_ids_dir
    revision_ids_filenames = sorted(os.listdir(revision_ids_directory))
    for revision_ids_filename in revision_ids_filenames:
        revision_ids = DataUtils.load_json(revision_ids_directory,
                                           revision_ids_filename)

        tuples = list()
        for page_name in revision_ids:
            page_name = page_name.replace(' ', '_')
            if page_name in categories:
                page_categories = categories[page_name]
                for page_category in page_categories:
                    json_dict = dict()
                    json_dict['template_name'] = None
                    json_dict['template_type'] = None
                    json_dict[
                        'subject'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace(
                            ' ', '_')
                    json_dict['predicate'] = 'wikiCategory'
                    json_dict['object'] = page_category.replace('_', ' ')
                    json_dict[
                        'source'] = 'http://fa.wikipedia.org/wiki/' + page_name.replace(
                            ' ', '_')
                    json_dict['version'] = revision_ids[page_name.replace(
                        '_', ' ')]
                    tuples.append(json_dict)
        DataUtils.save_json(Config.final_category_tuples_dir,
                            revision_ids_filename, tuples)
Example #9
0
def reorganize_infoboxes():
    reorganized_infoboxes = dict()
    directory = Config.extracted_with_infobox_dir
    filenames = DataUtils.get_infoboxes_filenames(directory)
    for filename in filenames:
        infoboxes = DataUtils.load_json(directory, filename)
        for infobox_name in infoboxes:
            template_name, infobox_type = DataUtils.get_infobox_name_type(
                infobox_name)
            if infobox_type not in reorganized_infoboxes:
                reorganized_infoboxes[infobox_type] = dict()
            if infobox_name not in reorganized_infoboxes[infobox_type]:
                reorganized_infoboxes[infobox_type][infobox_name] = dict()

            for page_name in infoboxes[infobox_name]:
                reorganized_infoboxes[infobox_type][infobox_name][
                    page_name] = infoboxes[infobox_name][page_name]

    for infobox_type in reorganized_infoboxes:
        for infobox_name in reorganized_infoboxes[infobox_type]:
            infobox_name_type_path = join(Config.reorganized_infoboxes_dir,
                                          infobox_type, infobox_name)
            DataUtils.save_json(
                infobox_name_type_path, 'infoboxes',
                reorganized_infoboxes[infobox_type][infobox_name])
Example #10
0
def listTorrents(outputFormat, sessionname):
    OutputFormat = 0
    url = DataUtils.GetUrl(sessionname)
    SID = DataUtils.GetAuthToken(sessionname)
    DSMVERSION = DataUtils.GetDSMVersion(sessionname)

    IsVersionSupported, URLParams, headers = get_torrent_list_url(
        DSMVERSION, SID)

    if not IsVersionSupported:
        logging.debug("Unsupported DSM Version: {}".format(DSMVERSION))
        print("Unsupported DSM Version")
        exit(1)

    FULLURL = urllib.parse.urljoin(url, URLParams)

    response = requests.request("GET", FULLURL, data=None, headers=headers)
    isAPICallOK = StdResponses.processAPIResponse(response)
    if (not isAPICallOK):
        exit(99)
    else:
        json_object = json.loads(response.text)
        if (outputFormat == "DF"):
            #print(json_object)
            aDF = DSMTransformers.GetListAsCsv(json_object)
            print(aDF)
        elif (outputFormat == "CSV"):
            #print(json_object)
            aDF = DSMTransformers.GetListAsCsv(json_object)
            print(aDF.to_csv(index=False))
        else:
            #print(json_object)
            json_formatted_str = json.dumps(json_object, indent=2)
            print(json_formatted_str)
def pre_deal_data(alpha=1, beta=1):
    """
    :param alpha: 运行时间权重
    :param beta: 代码行数权重
    """
    for case_id in raw_case_map.keys():
        timeList = []
        lineList = []
        for raw_case in raw_case_map[case_id]:
            # print(raw_case)
            timeList.append(raw_case.time)
            lineList.append(raw_case.line)
        # print(timeList)
        timeAVG = np.average(timeList)
        timeVAR = np.var(timeList)
        lineAVG = np.average(lineList)
        lineVAR = np.var(lineList)
        for raw_case in raw_case_map[case_id]:
            temp = raw_case.copy()

            time = DataUtils.omega(raw_case.time, timeAVG, timeVAR)
            line = DataUtils.omega(raw_case.line, lineAVG, lineVAR)
            temp.score = temp.score * time**alpha * line**beta
            student_case_map[temp.user_id][temp.case_id] = temp
            case_student_map[temp.case_id][temp.user_id] = temp
def Process_Auth_Login_Response(res, url, sessionname):

    if (res.status_code >= 400):
        print("Error Code: " + str(res.status_code))
        try:
            result = json.loads(res.text)
            if result['message']:
                print("Error Message: " + result['message'])
                return True
        except:
            return True

        return True

    else:
        try:
            result = json.loads(res.text)
            #print(result['token'])
            token = result['token']
            #print("Token:["+token+"]")
            print("Token Stored in session: " + sessionname)
            DataUtils.StoreAuthToken(token, sessionname)
            DataUtils.StoreUrl(url, sessionname)
            return False
        except:
            print("Unknown Error.")
            return True
def run(data, function, error, alpha, epsilion=1e-9, reg_type="L2GD", lamdas=[], degree=0):
    global f
    f = function
    train, validate = DataUtils.data_split(data, split_at=0.8)\

    x_train, y_train = DataUtils.xy_split(train)
    x_val, y_val     = DataUtils.xy_split(validate)

    print(f"Starting Regularized Gradient Descent\n with alpha={alpha}, epsilion={epsilion}\n")
    x_train.insert(0, "Const", np.ones(x_train.shape[0]))
    x_val.insert(0, "Const", np.ones(x_val.shape[0]))
    x_train = np.array(x_train)
    y_train = np.array(y_train)

    if len(lamdas) == 0:
        # lamdas = np.arange(0,1,0.1) # values b/w 0 and 1 stepped by 0.1
        # lamdas = [i for i in range(-1200, 1200, 200)]
        lamdas = np.linspace(-15000, 3000, 30)

    val_err = list()
    train_err = list()
    w_list = list()
    for lamda in tqdm(lamdas):
        # print(f"\n>>>Lamda: {lamda}")
        w = grad_desc(x_train, y_train, error, alpha, epsilion, lamda, reg_type)
        w_list.append(w)
        val_err.append( test(w, x_val, y_val) )
        train_err.append( error(w, x_train, y_train) )
    
    lamdas = np.array(lamdas)/data.shape[0]

    return w_list, lamdas, val_err, train_err
def change_group(groupnum,liname,operation,sessionname,CsvOutput,ProcessOutput = True):

    liid,groupMappings = IQBotCommons.GetAllGroupsFromLI(sessionname,liname)

    AllGroups = []
    if("," in groupnum):
        AllGroups = groupnum.split(",")
    elif(groupnum.upper() in ["ALL","ALLGROUPS","ALLGRPS","EVERYTHING"]):
        AllGroups = list(groupMappings.keys())
    else:
        AllGroups.append(groupnum)


    for Grp in AllGroups:
        GrpId = groupMappings[Grp]
        URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname), get_LI_GROUP_CHANGE_STATE_URI(liid,GrpId,Grp))
        payload = GET_GROUP_CHANGE_STATE_BODY(operation)
        headers = {
            'Content-Type': "application/json",
            'cache-control': "no-cache",
            'X-Authorization': DataUtils.GetAuthToken(sessionname)
        }

        response = requests.request(LI_GROUP_CHANGE_STATE_REQ_TYPE, URL,data=payload, headers=headers)
        #print(response.text)
        if(ProcessOutput):
            isInError = IQBotGroupResponses.Process_Grp_State_Change_Response(response,Grp,liname,CsvOutput)

        else:
            return response
Example #15
0
def try_params(model_gen, params, data,
               output_dir, base_fname, model_name, OBJECT,
               regression=False, nb_epoch=2, validation_data=(None, None)):
    def metrics_names(metrics):
        return sorted(metrics.keys())
    def metrics_to_list(metrics):
        return map(lambda key: metrics[key], metrics_names(metrics))

    summary_csv_fname = os.path.join(
            output_dir, base_fname + '_' + model_name + '_summary.csv')

    X_train, Y_train, X_test, Y_test = data
    nb_classes = params[1]
    to_write = []
    for param in params:
        param_base_fname = base_fname + '_' + model_name + '_' + '_'.join(map(str, param[2:]))
        model_fname = os.path.join(
                output_dir, param_base_fname + '.h5')
        csv_fname = os.path.join(
                output_dir, param_base_fname + '.csv')

        # Make, train, and evaluate the model
        model = model_gen(*param, regression=regression)
        if regression:
            train_time = run_model(model, data, nb_epoch=nb_epoch,
                    validation_data=validation_data)
            metrics = evaluate_model_regression(model, X_test, Y_test)
        else:
            if nb_classes == 2:
                train_time, metrics = learn_and_eval(model, data,
                        validation_data=validation_data)
            else:
                train_time = run_model(model, data, nb_epoch=nb_epoch,
                        validation_data=validation_data)
                metrics = evaluate_model_multiclass(model, X_test, Y_test)

        # Output predictions and save the model
        # Redo some computation to save my sanity
        conf1 = model.predict(X_train, batch_size=256, verbose=0)
        conf2 = model.predict(X_test,  batch_size=256, verbose=0)
        conf = np.concatenate([conf1, conf2])
        if len(conf.shape) > 1:
            assert len(conf.shape) == 2
            assert conf.shape[1] <= 2
            if conf.shape[1] == 2:
                conf = conf[:, 1]
            else:
                conf = np.ravel(conf)
        DataUtils.confidences_to_csv(csv_fname, conf, OBJECT)
        model.save(model_fname)

        to_write.append(list(param[2:]) + [train_time] + metrics_to_list(metrics))
        print param
        print train_time, metrics
        print
    print to_write
    # First two params don't need to be written out
    param_column_names = map(lambda i: 'param' + str(i), xrange(len(params[0]) - 2))
    column_names = param_column_names + ['train_time'] + metrics_names(metrics)
    DataUtils.output_csv(summary_csv_fname, to_write, column_names)
def generic_api_call_handler_no_post(outputFormat,sessionname,get_res_func,res_data):
        url = DataUtils.GetUrl(sessionname)
        TOKEN = DataUtils.GetAuthToken(sessionname)
        CRVERSION = DataUtils.GetCRVersion(sessionname)

        IsVersionSupported,CallType,ApiUri,Headers,Body = get_res_func(CRVERSION,sessionname,TOKEN,res_data)

        if not IsVersionSupported:
            logging.debug("Unsupported CR Version: {}".format(crversion))
            print("Unsupported CR Version")
            exit(1)

        FULLURL = urllib.parse.urljoin(url,ApiUri)

        response = requests.request(method=CallType, url=FULLURL, data=Body, headers=Headers)

        isAPICallOK = StdResponses.processAPIResponse(response)
        if(not isAPICallOK):
            exit(99)
        else:
            if(response.text != ""):
                json_object = json.loads(response.text)
                return json_object
            else:
                return response.status_code
Example #17
0
def not_map_farsnet_kg_ontology():
    input_ontology_filename = DataUtils.join(Config.farsnet_ontology,
                                             Config.farsnet_ontology_filename)
    input_farsnet_map_ontology_filename = DataUtils.join(
        Config.farsnet_ontology, Config.farsnet_map_ontology_filename)
    output_farsnet_not_map_ontology_filename = DataUtils.join(
        Config.farsnet_ontology, Config.farsnet_not_map_ontology_filename)

    normalizer = hazm.Normalizer()
    flag_find = False
    item = 'word'
    with open(input_ontology_filename, 'r') as input_file_ontology, \
            open(output_farsnet_not_map_ontology_filename, 'a') as output_file:
        csv_reader_ontology, csv_writer = csv.reader(
            input_file_ontology), csv.writer(output_file)
        for line_ontology in csv_reader_ontology:
            if not flag_find:
                csv_writer.writerow([item])
                print(item)
            item = normalizer.normalize(line_ontology[0])
            flag_find = False
            with open(input_farsnet_map_ontology_filename,
                      'r') as input_file_map:
                csv_reader_graph = csv.reader(input_file_map)

                for line_map in csv_reader_graph:
                    if item == normalizer.normalize(line_map[1]):
                        flag_find = True
                        break
Example #18
0
def find_farsnet_disambiguate_page():
    input_ambiguate_abstract_filename = join(
        Config.article_names_dir, Config.farsnet_ambiguate_abstract_filename)
    disambiguate_filename = os.listdir(Config.extracted_disambiguations_dir)
    abstract_filename = os.listdir(Config.extracted_texts_dir)
    output_disambiguate_wiki = join(Config.article_names_dir,
                                    Config.farsnet_disambiguate_wiki_filename)

    max_number = 0
    min_number = 1000
    with open(output_disambiguate_wiki,
              'w') as output_file, open(input_ambiguate_abstract_filename,
                                        'r') as input_file:
        csv_writer, csv_reader = csv.writer(output_file), csv.reader(
            input_file)

        for line in csv_reader:
            for disambiguate_file in disambiguate_filename:
                list_disambiguate = DataUtils.load_json(
                    Config.extracted_disambiguations_dir, disambiguate_file)
                # for item in list_disambiguate:
                #     if line[1] ==item:
                for item_disambiguate in list_disambiguate:
                    if line[1] == item_disambiguate['title']:
                        print(line[1] + ' find in disambiguate page.')

                        for abstract_file in abstract_filename:
                            list_abstract = DataUtils.load_json(
                                Config.extracted_texts_dir, abstract_file)
                            for abstract_key in list_abstract:

                                if any(abstract_key == d
                                       for d in item_disambiguate['field']):

                                    print('find abstract_key: ' + abstract_key)
                                    sentence_snapshot = str(line[3]).replace(
                                        ',', ' ').replace('،', ' ') + ' '
                                    gloss_sentence = str(line[4]).replace(
                                        ',', ' ').replace('،', ' ') + ' '
                                    example = gloss = str(line[5]).replace(
                                        ',', ' ').replace('،', ' ') + ' '
                                    sentence1 = sentence_snapshot + gloss_sentence + example
                                    sentence2 = str(
                                        list_abstract[abstract_key]).replace(
                                            ',', ' ').replace('،',
                                                              ' ').replace(
                                                                  '.', ' ')

                                    diff = similar(sentence1, sentence2)
                                    if diff > max_number:
                                        max_number = diff
                                    if diff < min_number:
                                        min_number = diff
                                    csv_writer.writerow([
                                        line[0], line[1], line[2], line[3],
                                        line[4], line[5], abstract_key,
                                        list_abstract[abstract_key], diff
                                    ])
Example #19
0
def realization_elm(elm, d, att, xor=False):
    np.random.shuffle(d)
    qt_training = int(0.8 * len(d))
    train_data, test_data = d[:qt_training], d[qt_training:]
    elm.train(train_data, att)
    accuracy = elm.test(test_data, att)

    if (xor): ut.plot_decision_surface_elm(elm, test_data, att)

    return accuracy
Example #20
0
def revert_previous_etags():
    try:
        previous_etags = DataUtils.load_json(
            Config.update_dir, Config.previous_wiki_rss_etags_filename)
        DataUtils.save_json(Config.update_dir, Config.wiki_rss_etags_filename,
                            previous_etags)
    except FileNotFoundError:
        DataUtils.save_json(Config.update_dir, Config.wiki_rss_etags_filename,
                            {dump_name: ''
                             for dump_name in DUMP_NAMES})
Example #21
0
def extract_bz2_dump(lang):
    input_filename = Config.latest_pages_articles_dump[lang]
    output_dir = Config.extracted_pages_articles_dir[lang]
    DataUtils.create_directory(output_dir, show_logging=True)
    if not os.listdir(output_dir):
        pages_counter = 0
        extracted_pages_filename, extracted_pages_file = DataUtils.open_extracted_bz2_dump_file(
            pages_counter, output_dir, lang)

        for page in DataUtils.get_wikipedia_pages(input_filename):
            extracted_pages_file.write(page)
            pages_counter += 1
            if pages_counter % Config.extracted_pages_per_file[lang] == 0:
                LogUtils.logging_pages_extraction(pages_counter,
                                                  extracted_pages_filename)
                DataUtils.close_extracted_bz2_dump_file(
                    extracted_pages_filename, extracted_pages_file)
                extracted_pages_filename, extracted_pages_file = \
                    DataUtils.open_extracted_bz2_dump_file(pages_counter, output_dir, lang)

        LogUtils.logging_pages_extraction(pages_counter,
                                          extracted_pages_filename)
        DataUtils.close_extracted_bz2_dump_file(extracted_pages_filename,
                                                extracted_pages_file)
        logging.info(
            'Page Extraction Finished! Number of All Extracted Pages: %d' %
            pages_counter)
Example #22
0
def run(model_name="squeezenet_by_pass"):
    # input image dimensions - from data utils
    img_rows, img_cols = d_utils.IMAGE_SIZE, d_utils.IMAGE_SIZE
    num_classes = d_utils.NUM_CLASSES
    channels = d_utils.NUM_CHANNELS

    # Read and prepare data
    x_train, y_train, training_le = d_utils.load_training_images()

    x_test, y_test = d_utils.load_test_images(training_le)

    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    input_shape = (img_rows, img_cols, channels)

    # Tensorboard callback
    tbCallBack = keras.callbacks.TensorBoard(log_dir='./tensorboard/' +
                                             model_name,
                                             histogram_freq=0,
                                             write_graph=True,
                                             write_images=False)

    # Build the model
    model = squeeze_net_by_pass(num_classes, input_shape)

    # Compile the model
    model.compile(loss=params['loss'],
                  optimizer=keras.optimizers.SGD(lr=params['base_lr'],
                                                 decay=params['decay_lr'],
                                                 momentum=params['momentum']),
                  metrics=['accuracy'])

    # Train the model
    model.fit(x_train,
              y_train,
              batch_size=params['batch_size'],
              epochs=params['epochs'],
              verbose=1,
              validation_data=(x_test, y_test),
              callbacks=[tbCallBack])

    # Print Results
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Results for ' + model_name + ':')
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
Example #23
0
def bankData():
    bankData = []
    for l_type in positions:
        l_data = DataUtils.get_frame_by_coord(
            "hello bank", pdf_link,
            DataUtils.compute_coord_from_object(positions, l_type),
            1).values.tolist()
        for line in l_data:
            bankData.append(
                ["HELLO BANK", "HOME LOAN", l_type, line[0], line[1]])
    return bankData
def table_extraction_bye_page_title(title):
    wiki_text, revision_id = get_wikitext_by_api(title)
    if wiki_text:
        tuples = list()
        wiki_text = wp.parse(wiki_text)
        for section in wiki_text.sections:
            for table in section.tables:
                new_tuples = build_tuples(table, title, section.title,
                                          revision_id)[0]
                tuples.extend(new_tuples)

        DataUtils.save_json(Config.wiki_table_tuples_dir, title, tuples)
Example #25
0
def extract_page_ids_from_sql_dump():
    all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_page_dump)

    page_ids = dict()
    for record in all_records:
        for columns in record:
            page_id, page_namespace, page_title = columns[0], columns[
                1], columns[2]
            page_ids[page_id] = page_title

    DataUtils.save_json(Config.extracted_page_ids_dir,
                        Config.extracted_page_ids_filename, page_ids)
Example #26
0
def copy_result(new_version_dir):
    successful_copy = True
    destination_address = join(DESTINATION_DIR, new_version_dir)
    DataUtils.create_directory(destination_address)

    for directory in RESULT_DIRECTORIES:
        if not DataUtils.copy_directory(directory, destination_address):
            successful_copy = False
    logging.info('Result directories successfully copied.')
    DataUtils.create_symlink(destination_address, join(DESTINATION_DIR,
                                                       'last'))
    return successful_copy
Example #27
0
def main():
    # Configs
    config = configs.config()

    # Reading the selected data
    DataCSVFrame = pd.read_csv("DataFrame.csv",
                               usecols=["Image_Index", "Finding_Labels"],
                               index_col=False)
    labelsSet = set(DataCSVFrame["Finding_Labels"].values)

    # Dictionary with the label as key and the index in the set as value
    labelsDict = {}
    # Dictionary that is the reverse of the one above, to change back from value to the corresponding label
    labelDictClassify = {}

    # Filling the dictionaries
    for index, label in enumerate(labelsSet):
        labelsDict[label] = index
        labelDictClassify[index] = label

    # Path where all the images are stored
    imgPath = config.getImagePath()
    # Creating the dataset
    xrayDataset = DC.XRayDataset(DataCSVFrame, imgPath, labelsDict)

    #    # Getting the first image from the dataset
    #    imgs, labs = xrayDataset.__getitem__([8307])
    #    print(len(imgs))
    #    print(imgs)

    # Get the device (cpu/gpu) to run the model on
    device = DU.getDevice()

    # Gets the ranges of training and test data
    training, testing = DU.splitTrainTest(xrayDataset, config)

    # Get the train and validation sets
    trainSets, valSets = DU.trainValSets(training, config)

    # Initialize the criterion, optimizer and model
    criterion, optimizer, model = NM.modelInit(device)

    # Get the batchsize
    batchsize = config.getBatchSize()

    # Train the model
    trainedModel = NM.trainNetwork(device, xrayDataset, trainSets, valSets,
                                   config, model, criterion, optimizer,
                                   batchsize)

    # Save the model to be used for testing
    NM.save_model(trainedModel, config.getModelName())
Example #28
0
def list_groups_from_li(liid, sessionname):
    # /IQBot/api/projects/b069d79d-5df0-43dc-824f-2c44474867ca/categories?offset=0&limit=50&sort=-index&trainingNotRequired=true
    URL = urllib.parse.urljoin(DataUtils.GetUrl(sessionname),
                               get_LI_LIST_GROUPS_FROM_LI_URI(liid))

    headers = {
        'Content-Type': "application/json",
        'cache-control': "no-cache",
        'X-Authorization': DataUtils.GetAuthToken(sessionname)
    }

    response = requests.request(LI_LIST_GROUPS_FROM_LI, URL, headers=headers)
    return response
Example #29
0
def extract_wiki_links_from_sql_dump(page_ids, output_directory,
                                     output_filename):
    wiki_links = defaultdict(list)

    all_records = SqlUtils.get_sql_rows(Config.fawiki_latest_page_links_dump)
    for record in all_records:
        for columns in record:
            pl_from, pl_title = columns[0], columns[2]
            if pl_from in page_ids:
                pl_from = page_ids[pl_from]
                wiki_links[pl_from].append(pl_title)

    DataUtils.save_json(output_directory, output_filename, wiki_links)
def change_group_status(outputFormat, sessionname, LiID, GroupNum, NewStatus):

    url = DataUtils.GetUrl(sessionname)
    TOKEN = DataUtils.GetAuthToken(sessionname)
    CRVERSION = DataUtils.GetCRVersion(sessionname)

    liid, groupMappings = IQBotCommons.GetAllGroupsFromLI(sessionname, LiID)

    AllGroups = []
    if ("," in GroupNum):
        AllGroups = GroupNum.split(",")
    elif (GroupNum.upper() in ["ALL", "ALLGROUPS", "ALLGRPS", "EVERYTHING"]):
        AllGroups = list(groupMappings.keys())
    else:
        AllGroups.append(GroupNum)

    AllRows = []
    for Grp in AllGroups:
        GrpId = groupMappings[Grp]
        JsonData = {
            "GrpId": GrpId,
            "GroupName": Grp,
            "LiId": LiID,
            "NewStatus": NewStatus
        }
        #IsVersionSupported,CallType,ApiUri,Headers,Body = get_group_update_resources(CRVERSION,sessionname,TOKEN,LiID,GrpId,Grp,NewStatus)
        json_object = StdAPIUtils.generic_api_call_handler_no_post(
            outputFormat, sessionname, get_group_update_resources, JsonData)

        Success = False
        CurentStatus = 'no change'
        GroupNumber = Grp
        if ('success' in json_object):
            Success = json_object['success']
        if ('data' in json_object):
            CurentStatus = json_object['data']
        aRow = {
            'GroupNumber': GroupNumber,
            'CurrentState': CurentStatus,
            'GroupID': GrpId,
            'UpdateSuccess': Success
        }
        AllRows.append(aRow)

    FinalDF = pd.DataFrame(AllRows)
    if (outputFormat == "DF"):
        print(FinalDF)
    elif (outputFormat == "CSV"):
        print(FinalDF.to_csv(index=False))
    else:
        print(FinalDF.to_json())
Example #31
0
def StDistandAzi(traces, hyp, dir):
    '''
    Given a list with st ids, a tuple with 
    (lat, lon, depth) of the hypocentre and 
    thw directory with the xml metafiles; it returns 
    dictionary with the distance and the azimuth of
    each station       
       
    '''
    Stdistribution = {}
    for trid in traces:     
            
        metafile = dir + "META." + trid + ".xml"
        META = DU.getMetadataFromXML(metafile)[trid]
        lat = META['latitude']
        lon = META['longitude']            
        dist = locations2degrees(hyp[0],hyp[1],lat,lon)                                
        azi =  -np.pi/180.*gps2DistAzimuth(lat,lon,
                   hyp[0],hyp[1])[2]
        Stdistribution[trid] = {'azi' : azi, 'dist' : dist} 

    
    #~ fig = plt.figure()  
    #~ plt.rc('grid', color='#316931', linewidth=1, linestyle='-')
    #~ plt.show()
    
    return Stdistribution
Example #32
0
def copyFileIfRelevant(cardNumber):
	#get the prices
	priceList = DataUtils.parseIntoPriceOnlyList(cardNumber, False)
	if priceList == None:
		return 0
		
	#if it's greater than 1 dollar
	curPrice = priceList[len(priceList)-1]
	if curPrice >= 100:
		shutil.copyfile(getFileNameFromCardNumber(cardNumber, True), getFileNameFromCardNumber(cardNumber, False))
		return 1
	else: return 0
Example #33
0
def getFileNameFromCardNumber(c, src):
	cardNumber = str(c)
	#missing 0's:
	mm0s = DataUtils.missing0s(cardNumber)
	
	#open the file, get the lines
	filename = ''
	filename += 'DatabaseDownloadTools/'
	if src:
		filename += 'cardPriceData'
	else:
		filename += 'RelevantCardPriceData'
	filename += '/CardData'+ mm0s + cardNumber+".txt"
	return filename
Example #34
0
def getFileNameFromCardNumber(c, useRelevant):
    cardNumber = str(c)
    # missing 0's:
    mm0s = DataUtils.missing0s(cardNumber)

    # open the file, get the lines
    filename = ""
    filename += "DatabaseDownloadTools/"
    if useRelevant:
        filename += "RelevantCardPriceData"
    else:
        filename += "cardPriceData"

    filename += "/CardData" + mm0s + cardNumber + ".txt"
    return filename
Example #35
0
def main(argv=sys.argv): 

    global eplat, eplon, epdepth, orig
    
    
    GFdir = "/home/roberto/data/GFS/"
    beta = 4.e3 #m/s 
    rho = 3.e3 #kg/m^3 
    mu = rho*beta*beta
    mu =40e9
    
    Lbdm0min = 1e-26*np.array([125.])
    Lbdsmooth = 1e-26*np.array([100.])
    
    #~ Lbdm0min = 1e-26*np.linspace(60.,500,40)
    #~ Lbdsmooth = 1e-26*np.linspace(60.,500,40)#*0.5

    corners = 4.
    fmin = 0.001
    fmax = 0.005
    
    ### Data from Chilean 2010 EQ (Same as W phase inv.) 
    strike = 18.
    dip    = 18.
    rake   = 104. # 109.
    #rake = 45.
    
    rakeA = rake + 45.
    rakeB = rake - 45.
    ####################
    nsx = 21
    nsy = 11
    Min_h = 10.
    flen  = 600. #Fault's longitude [km] along strike
    fwid  = 300. #Fault's longitude [km] along dip
    sflen = flen/float(nsx)
    sfwid = fwid/float(nsy)    
    swp = [1, 0, 2]
    nsf = nsx*nsy
    ###################
    
    t_h     = 10.
    MISFIT = np.array([])
    #RUPVEL = np.arange(1.0, 5.0, 0.05)
    RupVel = 2.1 # Best fit
    #RupVel = 2.25 #From Lay et al.
    
    
    #for RupVel in RUPVEL:
    print "****************************"
    print RupVel
    print "****************************"
    NP = [strike, dip, rake]
    NPA = [strike, dip, rakeA]
    NPB = [strike, dip, rakeB]
    
    M  = np.array(NodalPlanetoMT(NP))  
    MA = np.array(NodalPlanetoMT(NPA)) 
    MB = np.array(NodalPlanetoMT(NPB)) 
    
    Mp = np.sum(M**2)/np.sqrt(2)
    
    
    #############
        #Loading req file and EQparameters
    parameters={}    
    with open(argv[1],'r') as file:
        for line in file:
            line = line.split()
            key = line[0]
            val = line[1:]
            parameters[key] = val 
    #~ cmteplat = float(parameters['eplat'][0])
    #~ cmteplon = float(parameters['eplon'][0])
    #~ cmtepdepth=float(parameters['epdepth'][0])    
    orig = UTCDateTime(parameters['origin_time'][0])
    
    ####Hypocentre from
    ### http://earthquake.usgs.gov/earthquakes/eqinthenews/2010/us2010tfan/    
    cmteplat = -35.91#-35.85#-36.03#-35.83
    cmteplon = -72.73#-72.72#-72.83# -72.67
    cmtepdepth= 35.
    eq_hyp = (cmteplat,cmteplon,cmtepdepth)
      ############

    
    
    grid, sblt = fault_grid('CL-2010',cmteplat,cmteplon,cmtepdepth,0, Min_h,\
                             strike, dip, rake, flen, fwid, nsx, nsy,                            Verbose=False, ffi_io=True, gmt_io=True)
                             
    print ('CL-2010',cmteplat,cmteplon,cmtepdepth,0, Min_h,\
                             strike, dip, rake, flen, fwid, nsx, nsy,\
                            )
    print grid[0][1]
    #sys.exit()
    #############
    #Loading files and setting dirs:
    
    inputfile =  os.path.abspath(argv[1]) 
    if not os.path.exists(inputfile): print inputfile, "does not exist."; exit() 
    workdir = "/".join(inputfile.split("/")[:-1]) 
    basename = inputfile.split("/")[-1][:-4]
    if workdir[-1] != "/": workdir += "/"
    
    try :
        os.mkdir(workdir+"WPinv")
    except OSError:
        pass#print "Directory WPtraces already exists. Skipping"
    
    trfile = open(workdir+"goodtraces.dat")
    trlist = []
    #Loading Good traces files:
    while 1:
        line = trfile.readline().rstrip('\r\n')
        if not line: break        
        trlist.append(line.split()[0])        
    trfile.close()
    #############
    
    # Reading traces:    
    st = read(workdir+"WPtraces/" + basename + ".decov.trim.mseed")  
    #############################################################################
    ######Determining the sf closest to the hypocentre:    
    min_Dist_hyp_subf = flen *fwid
    for subf in range(nsf):
        sblat   = grid[subf][1]
        sblon   = grid[subf][0]
        sbdepth = grid[subf][2]              
        sf_hyp =  (sblat,sblon, sbdepth)        
        Dist_hyp_subf = hypo2dist(eq_hyp,sf_hyp)
        if Dist_hyp_subf < min_Dist_hyp_subf:
            min_Dist_hyp_subf = Dist_hyp_subf
            min_sb_hyp = sf_hyp
            hyp_subf = subf
    print hyp_subf,  min_sb_hyp,  min_Dist_hyp_subf    
    
    
    ####Determining trimming times:
    
    test_tr = read(GFdir + "H003.5/PP/GF.0001.SY.LHZ.SAC")[0]
    t0 = test_tr.stats.starttime
    TrimmingTimes = {}   # Min. Distace from the fault to each station. 
    A =0
    for trid in trlist:     
        tr = st.select(id=trid)[0]
        metafile = workdir + "DATA/" + "META." + tr.id + ".xml"
        META = DU.getMetadataFromXML(metafile)[tr.id]
        stlat = META['latitude']
        stlon = META['longitude'] 
        dist =   locations2degrees(min_sb_hyp[0],min_sb_hyp[1],\
                                   stlat,stlon) 
        parrivaltime = getTravelTimes(dist,min_sb_hyp[2])[0]['time']        
        ta = t0 + parrivaltime
        tb = ta + round(15.*dist) 
        TrimmingTimes[trid] = (ta, tb)
        
    
    ##############################################################################
     
    #####

    DIST = []
    # Ordering the stations in terms of distance
    for trid in trlist: 
        metafile = workdir + "DATA/" + "META." + trid + ".xml"
        META = DU.getMetadataFromXML(metafile)[trid]
        lat = META['latitude']
        lon = META['longitude']
        trdist = locations2degrees(cmteplat,cmteplon,lat,lon) 
        DIST.append(trdist)   

    
    DistIndex = lstargsort(DIST)
    
    if len(argv) == 3:
        trlist = [argv[2]]
        OneStation = True
    else:
        trlist = [trlist[i] for i in DistIndex]
        OneStation = False
        
   ##### 

    client = Client()
    ObservedDisp = np.array([])   
    gridlat = []
    gridlon = []
    griddepth = []
    sbarea = []
    mindist = flen*fwid # min distance hyp-subfault 
    

    ##########Loop for each subfault
    for subf in range(nsf):
        print "**********"
        print subf
        eplat   = grid[subf][1]
        eplon   = grid[subf][0]           
        epdepth = grid[subf][2]
        
        ## Storing the subfault's location centered in the hypcenter 
        gridlat.append(eplat-cmteplat)
        gridlon.append(eplon-cmteplon)
        griddepth.append(epdepth)
        
        
        strike = grid[subf][3] #+ 360.
        dip    = grid[subf][4]
        rake   = grid[subf][5] #     
        NP = [strike, dip, rake]
        
        M = np.array(NodalPlanetoMT(NP))   

        
                #Calculating the time dalay:
            
        sf_hyp = (eplat,eplon, epdepth)        
        Dist_ep_subf = hypo2dist(eq_hyp,sf_hyp)
        t_d = round(Dist_ep_subf/RupVel) #-59.
        print eplat,eplon, epdepth
    
        #t_d  = 0.
        
    
        # Determining depth dir:
        depth = []
        depthdir = []
        for file in os.listdir(GFdir):
            if file[-2:] == ".5":
                depthdir.append(file)
                depth.append(float(file[1:-2]))            
        BestDirIndex = np.argsort(abs(epdepth-np.array(depth)))[0]      
        hdir = GFdir + depthdir[BestDirIndex] + "/"   
        # hdir is the absolute path to the closest deepth. 
        
        
        
        SYN = np.array([])
        SYNA = np.array([])
        SYNB = np.array([])

        
        #Main loop :
        for trid in trlist:  
                       
            tr = st.select(id=trid)[0]    
            metafile = workdir + "DATA/" + "META." + tr.id + ".xml"
            META = DU.getMetadataFromXML(metafile)[tr.id]
            lat = META['latitude']
            lon = META['longitude']    
            trPPsy,  trRRsy, trRTsy,  trTTsy = \
                                   GFSelectZ(lat,lon,hdir) 
            
            tr.stats.delta = trPPsy.stats.delta
            azi =   -np.pi/180.*gps2DistAzimuth(lat,lon,\
                                               eplat,eplon)[2]
            trROT = MTrotationZ(azi, trPPsy,  trRRsy, trRTsy,  trTTsy)        
                        
            
                    #Triangle 
            dt = trROT[0].stats.delta          
            trianglen = 2.*t_h/dt-1.
            window = triang(trianglen)
            window /= np.sum(window)
            #window = np.array([1.])
            
            FirstValid = int(trianglen/2.) + 1
            dist =   locations2degrees(eplat,eplon,lat,lon) 
            parrivaltime = getTravelTimes(dist,epdepth)[0]['time']
            
            t1 = TrimmingTimes[trid][0] - t_d
            t2 = TrimmingTimes[trid][1] - t_d
            
   
            #~ t1 = trROT[0].stats.starttime + parrivaltime- t_d
            #~ t2 = t1+ round(MinDist[tr.id]*15. )
                           
           
            N = len(trROT[0])
            for trR in trROT:
                trR.data *= 10.**-21 ## To get M in Nm                   
                trR.data -= trR.data[0]
                AUX1 = len(trR)
                trR.data = convolve(trR.data,window,mode='valid') 
                AUX2 = len(trR)
                mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\
                               trR.data[:60./trR.stats.delta*1.-FirstValid+1])))
                #mean = np.mean(trR.data[:60])
                trR.data -= mean      
                trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.                                              delta,
                corners , 1 , fmin, fmax)  
                t_l = dt*0.5*(AUX1 - AUX2)                             
                trR.trim(t1-t_l,t2-t_l, pad=True, fill_value=trR.data[0])  #We lost t_h due to the convolution                  
            
            
         
            #~ for trR in trROT:
                #~ trR.data *= 10.**-23 ## To get M in Nm
                #~ trR.data -= trR.data[0]                
                #~ trR.data = convolve(trR.data,window,mode='same')
                #~ # mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\
                             #~ # trR.data[:60./trR.stats.delta*1.-FirstValid+1])))
                #~ mean = np.mean(trR.data[:60])               
                #~ trR.data -= mean
                #~ trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\
                                             #~ corners ,1 , fmin, fmax)
                #~ trR.trim(t1,t2,pad=True, fill_value=trR.data[0])  

            nmin = min(len(tr.data),len(trROT[0].data))             
            tr.data = tr.data[:nmin]
            for trR in trROT:
                trR.data = trR.data[:nmin]
              
                
             #############            
            trROT = np.array(trROT)  
            syn  =  np.dot(trROT.T,M) 
            synA =  np.dot(trROT.T,MA)
            synB =  np.dot(trROT.T,MB)
            
            SYN = np.append(SYN,syn)  
            SYNA = np.append(SYNA,synA)
            SYNB = np.append(SYNB,synB)
            
            if subf == 0 : ObservedDisp =  np.append(ObservedDisp,tr.data,0) 
            
  
        sbarea.append(grid[subf][6])
   
        print np.shape(A), np.shape(np.array([SYN]))
        if subf == 0: 
            A  = np.array([SYN])
            AA = np.array([SYNA])
            AB = np.array([SYNB])
        else:
            A = np.append(A,np.array([SYN]),0) 
            AA = np.append(AA,np.array([SYNA]),0)
            AB = np.append(AB,np.array([SYNB]),0)
        
    
    
    #Full matrix with the two rake's component
    AC = np.vstack((AA,AB))

#MISFIT = np.array([])
########## Stabilizing the solution:         

#### Moment minimization:
#~ constraintD  = np.zeros(nsf)
#~ ObservedDispcons = np.append(ObservedDisp,constraintD)
#~ for lbd in Lbd:
    #~ constraintF  = lbd*np.eye(nsf,nsf)         
    #~ Acons = np.append(A,constraintF,1)   
    #~ print np.shape(Acons.T), np.shape(ObservedDispcons)
    #~ R = nnls(Acons.T,ObservedDispcons)
    #~ M = R[0]
    #~ #M = np.zeros(nsf)
    #~ #M[::2] = 1
    #~ fit = np.dot(A.T,M)
    #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\
             #~ /np.sum(np.abs(ObservedDisp))
    
    #~ MISFIT = np.append(MISFIT,misfit)
#~ plt.figure()
#~ plt.plot(Lbd,MISFIT)
#~ ###########################################
#~ ### Smoothing:
#~ constraintF_base = SmoothMatrix(nsx,nsy)
#~ constraintD  = np.zeros(np.shape(constraintF_base)[0])
#~ ObservedDispcons = np.append(ObservedDisp,constraintD)
#~ for lbd in Lbd:
    #~ constraintF  = lbd*constraintF_base   
    #~ Acons = np.append(A,constraintF.T,1)   
    #~ #print np.shape(Acons.T), np.shape(ObservedDispcons)
    #~ R = nnls(Acons.T,ObservedDispcons)
    #~ M = R[0]
    #~ fit = np.dot(A.T,M)
    #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\
             #~ /np.sum(np.abs(ObservedDisp))
    #~ print lbd, misfit
    #~ MISFIT = np.append(MISFIT,misfit)
#~ ###########################################    
###########################################
#~ ##### Moment Minimization (including rake projections):
#~ constraintD  = np.zeros(2*nsf)
#~ ObservedDispcons = np.append(ObservedDisp,constraintD)
#~ for lbd in Lbd:
    #~ constraintF  = lbd*np.eye(2*nsf,2*nsf)         
    #~ ACcons = np.append(AC,constraintF,1)   
    #~ print np.shape(ACcons.T), np.shape(ObservedDispcons)
    #~ R = nnls(ACcons.T,ObservedDispcons)
    #~ M = R[0]
    #~ fit = np.dot(AC.T,M)
    #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\
             #~ /np.sum(np.abs(ObservedDisp))        
    #~ MISFIT = np.append(MISFIT,misfit)  
    #~ M = np.sqrt(M[:nsf]**2+M[nsf:]**2)

##############################################
### Smoothing (including rake projections):
#~ constraintF_base = SmoothMatrix(nsx,nsy)
#~ Nbase = np.shape(constraintF_base)[0]
#~ constraintD  = np.zeros(2*Nbase)
#~ constraintF_base_big = np.zeros((2*Nbase, 2*nsf))
#~ constraintF_base_big[:Nbase,:nsf]= constraintF_base
#~ constraintF_base_big[Nbase:,nsf:]= constraintF_base 
#~ ObservedDispcons = np.append(ObservedDisp,constraintD)
#~ for lbd in Lbd:
    #~ constraintF  = lbd*constraintF_base_big   
    #~ ACcons = np.append(AC,constraintF.T,1)   
    #~ #print np.shape(Acons.T), np.shape(ObservedDispcons)
    #~ R = nnls(ACcons.T,ObservedDispcons)
    #~ M = R[0]
    #~ fit = np.dot(AC.T,M)
    #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\
             #~ /np.sum(np.abs(ObservedDisp))
    #~ print lbd, misfit
    #~ MISFIT = np.append(MISFIT,misfit)
#~ M = np.sqrt(M[:nsf]**2+M[nsf:]**2)    
###########################################    
#~ ##### Moment Minimization and  Smoothing
  #~ #### (including rake projections):
    #~ mom0 = []
    #~ constraintF_base = SmoothMatrix(nsx,nsy)
    #~ Nbase = np.shape(constraintF_base)[0]
    #~ constraintDsmoo = np.zeros(2*Nbase)
    #~ constraintDmin  = np.zeros(2*nsf)
    #~ constraintF_base_big = np.zeros((2*Nbase, 2*nsf))
    #~ constraintF_base_big[:Nbase,:nsf]= constraintF_base
    #~ constraintF_base_big[Nbase:,nsf:]= constraintF_base 
    #~ ObservedDispcons = np.concatenate((ObservedDisp,
                                  #~ constraintDmin,
                             #~ constraintDsmoo  ))    
   
    #~ for lbdm0 in Lbdm0min:
        #~ constraintFmin  = lbdm0*np.eye(2*nsf,2*nsf)
        #~ for lbdsm in Lbdsmooth:              
            #~ constraintFsmoo  = lbdsm*constraintF_base_big 
            #~ ACcons = np.hstack((AC, constraintFmin, constraintFsmoo.T))   
            #~ print lbdm0, lbdsm
            #~ R = nnls(ACcons.T,ObservedDispcons)
            #~ M = R[0]
            #~ fit = np.dot(AC.T,M)
            #~ misfit = 100.*np.sum(np.abs(fit-ObservedDisp))\
                     #~ /np.sum(np.abs(ObservedDisp))        
            #~ MISFIT = np.append(MISFIT,misfit) 
            #~ MA = M[:nsf]
            #~ MB = M[nsf:]
            #~ M = np.sqrt(MA**2+MB**2)
            #~ mom0.append(np.sum(M))
    ##############################################

    # Rotation to the rake's conventional angle:  
    #MB, MA = Rot2D(MB,MA,-rakeB)
    print np.shape(M), np.shape(A.T)
    R = nnls(A.T,ObservedDisp)
    M = R[0]
    
    #~ M = np.zeros(nsf)
    #~ M[::2] = 1 
    fit = np.dot(A.T,M)
    MA = M
    MB = M
    
    np.save("RealSol", M)
      
    nm0 = np.size(Lbdm0min) 
    nsmth = np.size(Lbdsmooth)
    #~ plt.figure()
    #~ plt.pcolor(1./Lbdsmooth, 1./Lbdm0min,MISFIT.reshape(nm0,nsmth))
    #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24)
    #~ plt.ylabel(r'$1/ \lambda_{1}$',fontsize = 24 )
    #~ plt.ylim((1./Lbdm0min).min(),(1./Lbdm0min).max() )
    #~ plt.ylim((1./Lbdsmooth).min(),(1./Lbdsmooth).max() )
    #~ cbar = plt.colorbar()
    #~ cbar.set_label("Misfit %")
    #~ print np.shape(Lbdm0min), np.shape(mom0)
    
    #~ plt.figure()
    #~ CS = plt.contour(1./Lbdsmooth, 1./Lbdm0min,MISFIT.reshape(nm0,nsmth) )
    #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24)
    #~ plt.ylabel(r'$1/ \lambda_{1}$',fontsize = 24 )
    #~ plt.clabel(CS, inline=1, fontsize=10)
    #~ plt.title('Misfit')
    
    
    
    #~ plt.figure()
    #~ plt.plot(1./Lbdm0min,MISFIT)
    #~ plt.xlabel(r'$1/ \lambda_{2}$', fontsize = 24)
    #~ plt.ylabel("Misfit %")
    #~ plt.figure()
    #~ plt.plot(Lbdm0min,mom0)
    #~ plt.ylabel(r'$M_0\, [Nm]$', fontsize = 24)
    #~ plt.xlabel(r'$\lambda_{M0}$', fontsize = 24)

   
    misfit = 100.*np.sum(np.abs(fit-ObservedDisp))/np.sum(np.abs(ObservedDisp))
    print "Residual: ", 1000.*R[1]
    print misfit
    
    
    #SLIP = M*Mp/mu/(1.e6*np.array(sbarea))
    
    sbarea = sflen*sfwid
    SLIP = M/(mu*1.e6*sbarea)
    SLIP = SLIP.reshape(nsx,nsy).T[::-1]
    moment = M.reshape(nsx,nsy).T[::-1]
    
    plt.figure(figsize = (13,5))
    plt.plot(fit,'b' ,label="Fit")
    plt.plot(ObservedDisp,'r',label="Observed")
    plt.xlabel("Time [s]")
    plt.ylabel("Displacement [m]")
    plt.legend()
    
    
    np.set_printoptions(linewidth=1000,precision=3)
    print "***********"
    print sbarea
    print SLIP
    print np.mean(SLIP)
    print "Moment:"
    print np.sum(M)
 

    ### SLIPS Distribution (as the synthetics) :
    SLIPS = M.reshape(nsx,nsy).T
    SLIPS /=  mu*1.e6*sbarea
    
    
    #~ #########Ploting slip distribution:
    #~ #we are going to reflect the y axis later, so:
    hypsbloc = [hyp_subf / nsy , -(hyp_subf % nsy) - 2]

    #Creating the strike and dip axis:
    StrikeAx= np.linspace(0,flen,nsx+1)
    DipAx= np.linspace(0,fwid,nsy+1)
    DepthAx = DipAx*np.sin(np.pi/180.*dip) + Min_h
    print DepthAx
    hlstrike = StrikeAx[hypsbloc[0]] + sflen*0.5
    #we are going to reflect the axis later, so:
    hldip = DipAx[hypsbloc[1]] + sfwid*0.5 
    hldepth = DepthAx[hypsbloc[1]] + sfwid*0.5*np.sin(np.pi/180.*dip)    
    
    StrikeAx = StrikeAx - hlstrike
    DipAx =     DipAx   - hldip
    
    XX, YY = np.meshgrid(StrikeAx, DepthAx)
    XX, ZZ = np.meshgrid(StrikeAx, DipAx )  

   ######Plot: (Old colormap: "gist_rainbow_r")
    plt.figure(figsize = (13,6))
    ax = host_subplot(111)
    im = ax.pcolor(XX, YY, SLIPS, cmap="jet")    
    ax.set_ylabel('Depth [km]')       
    ax.set_ylim(DepthAx[-1],DepthAx[0])  
    
    # Creating a twin plot 
    ax2 = ax.twinx()
    im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="jet")    
    ax2.set_ylabel('Distance along the dip [km]')
    ax2.set_xlabel('Distance along the strike [km]')    
    ax2.set_ylim(DipAx[0],DipAx[-1])
    ax2.set_xlim(StrikeAx[0],StrikeAx[-1])       
                         
                         
    ax.axis["bottom"].major_ticklabels.set_visible(False) 
    ax2.axis["bottom"].major_ticklabels.set_visible(False)
    ax2.axis["top"].set_visible(True)
    ax2.axis["top"].label.set_visible(True)
    
    
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("bottom", size="5%", pad=0.1)
    cb = plt.colorbar(im, cax=cax, orientation="horizontal")
    cb.set_label("Slip [m]")             
    ax2.plot([0], [0], '*', ms=225./(nsy+4))
    ax2.set_xticks(ax2.get_xticks()[1:-1])

    
    #~ ### Rake plot:
    plt.figure(figsize = (13,6))
    fig = host_subplot(111)
    XXq, ZZq = np.meshgrid(StrikeAx[:-1]+sflen, DipAx[:-1]+sfwid )
    Q = plt.quiver(XXq,ZZq, MB.reshape(nsx,nsy).T[::-1,:]/(mu*1.e6*sbarea), 
                    MA.reshape(nsx,nsy).T[::-1,:]/(mu*1.e6*sbarea),
                    SLIPS[::-1,:],
                units='xy',scale = 0.5  ,  linewidths=(2,), 
                edgecolors=('k'), headaxislength=5  )
    fig.set_ylim([ZZq.min()-80,ZZq.max()+80])
    fig.set_xlim([XXq.min()-20, XXq.max()+20 ])
    fig.set_ylabel('Distance along dip [km]') 
    fig.set_xlabel('Distance along the strike [km]') 
    
    fig2 = fig.twinx()
    fig2.set_xlabel('Distance along the strike [km]') 
    
    fig.axis["bottom"].major_ticklabels.set_visible(False) 
    fig.axis["bottom"].label.set_visible(False)
    fig2.axis["top"].set_visible(True)
    fig2.axis["top"].label.set_visible(True)
    fig2.axis["right"].major_ticklabels.set_visible(False)

    divider = make_axes_locatable(fig)
    cax = divider.append_axes("bottom", size="5%", pad=0.1)
    cb = plt.colorbar(im, cax=cax, orientation="horizontal")
    cb.set_label("Slip [m]") 
    

    
    
    plt.show()

    
        #############
    #~ print np.shape(MISFIT),  np.shape(RUPVEL)
    #~ plt.figure()
    #~ plt.plot(RUPVEL,MISFIT)
    #~ plt.xlabel("Rupture Velocity [km/s]")
    #~ plt.ylabel("Misfit %")
    #~ plt.show()
     

    print np.shape(MB.reshape(nsx,nsy).T)
    print np.shape(ZZ)
Example #36
0
def main(argv=sys.argv): 
    
    #Earth's parameters 
    #~ beta = 4.e3 #m/s 
    #~ rho = 3.e3 #kg/m^3 
    #~ mu = rho*beta*beta
    
    PLotSt = ["IU.TRQA.00.LHZ",
             "IU.LVC.00.LHZ",
             "II.NNA.00.LHZ",
              "IU.RAR.00.LHZ"]
             
             
    #PlotSubf = [143, 133, 123, 113, 103, 93,
     #           83, 73, 63, 53]
    PlotSubf = [6,3]

    
    
    #Set rup_vel = 0 to have a point source solution
    RupVel = 2.1 #Chilean eq from Lay et al
    t_h     = 10. # Half duration for each sf  
    noiselevel = 0.0# L1 norm level of noise
    mu =40e9
    #W-Phase filter 
    corners = 4.
    fmin = 0.001
    fmax = 0.005
    
    ### Data from Chilean 2010 EQ (Same as W phase inv.) 
    strike = 18.
    dip    = 18.
    rake   = 104. # 109.
    
    rakeA = rake + 45.
    rakeB = rake - 45.
    
    
    ### Fault's grid parameters
    nsx   = 21 #Number of sf along strike
    nsy   = 11 #Number of sf along dip
    flen  = 600. #Fault's longitude [km] along strike
    fwid  = 300. #Fault's longitude [km] along dip
    direc = 0    #Directivity 0 = bilateral
    Min_h = 10.  #Min depth of the fault
    
    
    ### Derivated parameters:
    nsf = nsx*nsy
    sflen = flen/float(nsx)         
    sfwid = fwid/float(nsy)
    swp = [1, 0, 2] # useful to swap (lat,lon, depth)  
    mindist = flen*fwid # minimun dist to the hypcen (initializing)
    
    ###Chessboard
    #weight = np.load("RealSol.npy") 
    weight = np.zeros(nsf)
    weight[::2] = 1 
    #weight[::2] = 1 
    #~ weight[10]=15
    #~ weight[5001]=10
    #~ weight[3201]=2
    
    
    
    ## Setting dirs and reading files.
    GFdir = "/home/roberto/data/GFS/"
    workdir = os.path.abspath(".")+"/"
    datadir = workdir + "DATA/"
    tracesfilename = workdir + "goodtraces.dat"
    tracesdir = workdir + "WPtraces/"
    
    try:
        reqfilename    = glob.glob(workdir + '*.syn.req')[0]
    except IndexError:   
        print "There is not *.syn.req file in the dir"
        sys.exit()
    
    basename = reqfilename.split("/")[-1][:-4]
    
    if not os.path.exists(tracesfilename): 
        print tracesfilename, "does not exist."
        exit()
    
    if not os.path.exists(datadir):
            os.makedirs(datadir)
    
    if not os.path.exists(tracesdir):
            os.makedirs(tracesdir)
 
    tracesfile = open(tracesfilename)    
    reqfile =  open(reqfilename)    
    
    trlist = readtraces(tracesfile)
    eqdata = readreq(reqfile)    

    tracesfile.close()
    reqfile.close()   
    
    ####Hypocentre from
    ### http://earthquake.usgs.gov/earthquakes/eqinthenews/2010/us2010tfan/    
    cmteplat = -35.91#-35.85#-36.03#-35.83
    cmteplon = -72.73#-72.72#-72.83# -72.67
    cmtepdepth= 35.
    eq_hyp = (cmteplat,cmteplon,cmtepdepth)
    
    
      ############
    

    # Defining the sf system
    grid, sblt = fault_grid('CL-2010',cmteplat,cmteplon,
                            cmtepdepth, direc,
                            Min_h, strike, dip, rake, flen,fwid ,nsx,nsy,
                            Verbose=False,ffi_io=True,gmt_io=True)
    
    print ('CL-2010',cmteplat,cmteplon,
                            cmtepdepth, direc,
                            Min_h, strike, dip, rake, flen,fwid ,nsx,nsy)
    print grid[0][1]
    #sys.exit()
    #This calculation is inside of the loop
    #~ NP = [strike, dip, rake]
    #~ M = np.array(NodalPlanetoMT(NP))  
    #~ Mp = np.sum(M**2)/np.sqrt(2)    
     
    #############################################################################
    ######Determining the sf closest to the hypocentre:    
    min_Dist_hyp_subf = flen *fwid
    for subf in range(nsf):
        sblat   = grid[subf][1]
        sblon   = grid[subf][0]
        sbdepth = grid[subf][2]              
        sf_hyp =  (sblat,sblon, sbdepth)        
        Dist_hyp_subf = hypo2dist(eq_hyp,sf_hyp)
        if Dist_hyp_subf < min_Dist_hyp_subf:
            min_Dist_hyp_subf = Dist_hyp_subf
            min_sb_hyp = sf_hyp
            hyp_subf = subf
    ####Determining trimming times:    
    test_tr = read(GFdir + "H003.5/PP/GF.0001.SY.LHZ.SAC")[0]
    t0 = test_tr.stats.starttime
    TrimmingTimes = {}   # Min. Distace from the fault to each station. 
    A =0
    for trid in trlist:     
        metafile = workdir + "DATA/" + "META." + trid + ".xml"
        META = DU.getMetadataFromXML(metafile)[trid]
        stlat = META['latitude']
        stlon = META['longitude'] 
        dist =   locations2degrees(min_sb_hyp[0],min_sb_hyp[1],\
                                   stlat,stlon) 
        parrivaltime = getTravelTimes(dist,min_sb_hyp[2])[0]['time']        
        ta = t0 + parrivaltime
        tb = ta + round(15.*dist) 
        TrimmingTimes[trid] = (ta, tb)
        
    
    ###########################

      
    
    DIST = []
    # Ordering the stations in terms of distance
    for trid in trlist: 
        metafile = workdir + "DATA/" + "META." + trid + ".xml"
        META = DU.getMetadataFromXML(metafile)[trid]
        lat = META['latitude']
        lon = META['longitude']
        trdist = locations2degrees(cmteplat,
                                   cmteplon,lat,lon) 
        DIST.append(trdist)   

    DistIndex = lstargsort(DIST)
    trlist = [trlist[i] for i in DistIndex]
  
    stdistribution = StDistandAzi(trlist, eq_hyp , workdir + "DATA/")
    StDistributionPlot(stdistribution)
    #exit()
    #Main loop
   

 

        
    for subf in range(nsf):
        print subf
        sflat   = grid[subf][1]
        sflon   = grid[subf][0]           
        sfdepth = grid[subf][2]
        #~ strike = grid[subf][3] #+ 360.
        #~ dip    = grid[subf][4]
        #~ rake   = grid[subf][5] #     
        NP = [strike, dip, rake]  
        NPA = [strike, dip, rakeA]
        NPB = [strike, dip, rakeB]        


        
        M = np.array(NodalPlanetoMT(NP))   
        MA = np.array(NodalPlanetoMT(NPA)) 
        MB = np.array(NodalPlanetoMT(NPB)) 
        #Time delay is calculated as the time in which 
        #the rupture reach the subfault
            
        sf_hyp = (sflat, sflon, sfdepth) 
        Dist_ep_subf = hypo2dist(eq_hyp,sf_hyp)
        
        if Dist_ep_subf < mindist:
            mindist = Dist_ep_subf
            minsubf = subf
        
                
        if RupVel == 0:
            t_d = eqdata['time_shift']
        else:
            t_d = round(Dist_ep_subf/RupVel) #-59.
       
        print sflat, sflon, sfdepth
        # Looking for the best depth dir:
        depth = []
        depthdir = []
        for file in os.listdir(GFdir):
            if file[-2:] == ".5":
                depthdir.append(file)
                depth.append(float(file[1:-2]))            
        BestDirIndex = np.argsort(abs(sfdepth\
                                  - np.array(depth)))[0]      
        hdir = GFdir + depthdir[BestDirIndex] + "/"     
        
        ###

        SYN = np.array([])
        SYNA = np.array([])
        SYNB = np.array([])
        for trid in trlist:     
            
            metafile = workdir + "DATA/" + "META." + trid + ".xml"
            META = DU.getMetadataFromXML(metafile)[trid]
            lat = META['latitude']
            lon = META['longitude']  
            
            #Subfault loop               
            #GFs Selection:
            ##Change to folloing loop
            
            dist = locations2degrees(sflat,sflon,lat,lon)                                
            azi =  -np.pi/180.*gps2DistAzimuth(lat,lon,
                       sflat,sflon)[2] 
            trPPsy,  trRRsy, trRTsy,  trTTsy = \
                                       GFSelectZ(hdir,dist)          
            
            
 
            
            trROT =  MTrotationZ(azi, trPPsy,  trRRsy, trRTsy,  trTTsy) 
            orig = trROT[0].stats.starttime  
            dt = trROT[0].stats.delta                       

            trianglen = 2.*int(t_h/dt)-1.
            FirstValid = int(trianglen/2.) + 1 # to delete
            window = triang(trianglen)
            window /= np.sum(window)
            #window = np.array([1.])
            
      
            
            
            parrivaltime = getTravelTimes(dist,sfdepth)[0]['time']
            
            t1 = TrimmingTimes[trid][0] - t_d
            t2 = TrimmingTimes[trid][1] - t_d
            
            
            
            for trR in trROT:
                trR.data *= 10.**-21 ## To get M in Nm                   
                trR.data -= trR.data[0]
                AUX1 = len(trR)
                trR.data = convolve(trR.data,window,mode='valid') 
                AUX2 = len(trR)
                mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\
                               trR.data[:60./trR.stats.delta*1.-FirstValid+1])))
                #mean = np.mean(trR.data[:60])
                trR.data -= mean      
                trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\
                                             corners , 1 , fmin, fmax)  
                t_l = dt*0.5*(AUX1 - AUX2)                             
                trR.trim(t1-t_l,t2-t_l, pad=True, fill_value=trR.data[0])  #We lost t_h due to the convolution        
            


                   
            #~ for trR in trROT:
                #~ trR.data *= 10.**-23 ## To get M in Nm                   
                #~ trR.data -= trR.data[0]
 
                #~ trR.data = convolve(trR.data,window,mode='same') 

                #~ #mean = np.mean(np.hstack((trR.data[0]*np.ones(FirstValid),\
                               #~ #trR.data[:60./trR.stats.delta*1.-FirstValid+1])))
                #~ mean = np.mean(trR.data[:60])
                #~ trR.data -= mean      
                #~ trR.data = bp.bandpassfilter(trR.data,len(trR), trR.stats.delta,\
                                             #~ corners , 1 , fmin, fmax)  
                            
                #~ trR.trim(t1,t2,pad=True, fill_value=trR.data[0])     
           
            trROT = np.array(trROT)  
            syn  =  np.dot(trROT.T,M) 
            synA =  np.dot(trROT.T,MA)
            synB =  np.dot(trROT.T,MB)
            
            SYN = np.append(SYN,syn)  
            SYNA = np.append(SYNA,synA)
            SYNB = np.append(SYNB,synB)
            
            
        print np.shape(A), np.shape(np.array([SYN]))    
        if subf == 0: 
            A = np.array([SYN])
            AA = np.array([SYNA])
            AB = np.array([SYNB])
        else:
            A = np.append(A,np.array([SYN]),0)    
            AA = np.append(AA,np.array([SYNA]),0)
            AB = np.append(AB,np.array([SYNB]),0)
            
            
            
    AC = np.vstack((AA,AB))
    print np.shape(AC)
    print np.shape(weight)
    B = np.dot(A.T,weight)
    stsyn = Stream()
    n = 0
    Ntraces= {}
    for trid in trlist: 
        spid = trid.split(".")        
        print trid
        NMIN = 1. + (TrimmingTimes[trid][1] - TrimmingTimes[trid][0]) / dt
        Ntraces[trid] = (n,NMIN + n)
        trsyn = Trace(B[n:NMIN+n])   
        n += NMIN        
        trsyn.stats.network = spid[0]
        trsyn.stats.station = spid[1]
        trsyn.stats.location = spid[2]
        trsyn.stats.channel = spid[3] 
        trsyn = AddNoise(trsyn,level = noiselevel)
        #trsyn.stats.starttime = 
        stsyn.append(trsyn)
        
       
    stsyn.write(workdir+"WPtraces/" + basename + ".decov.trim.mseed",
                 format="MSEED")           
                
    #####################################################    
    # Plotting:
    #####################################################
    #we are going to reflect the y axis later, so:
    print minsubf
    hypsbloc = [minsubf / nsy , -(minsubf % nsy) - 2]

    #Creating the strike and dip axis:
    StrikeAx= np.linspace(0,flen,nsx+1)
    DipAx= np.linspace(0,fwid,nsy+1)
    DepthAx = DipAx*np.sin(np.pi/180.*dip) + Min_h    
    hlstrike = StrikeAx[hypsbloc[0]] + sflen*0.5
        
    hldip = DipAx[hypsbloc[1]] + sfwid*0.5 
    hldepth = DepthAx[hypsbloc[1]] + sfwid*0.5*np.sin(np.pi/180.*dip)
       
    StrikeAx = StrikeAx - hlstrike
    DipAx =     DipAx   - hldip
 

    
    XX, YY = np.meshgrid(StrikeAx, DepthAx)
    XX, ZZ = np.meshgrid(StrikeAx, DipAx )

   
    sbarea = sflen*sfwid
    
    SLIPS = weight.reshape(nsx,nsy).T#[::-1,:]
    SLIPS /= mu*1.e6*sbarea
    
    ######Plot:#####################
    plt.figure()
    ax = host_subplot(111)
    im = ax.pcolor(XX, YY, SLIPS, cmap="jet")    
    ax.set_ylabel('Depth [km]')       
    ax.set_ylim(DepthAx[-1],DepthAx[0])  
    
    # Creating a twin plot 
    ax2 = ax.twinx()
    #im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="Greys") 
    im2 = ax2.pcolor(XX, ZZ, SLIPS[::-1,:], cmap="jet")    
    
    ax2.set_ylabel('Distance along the dip [km]')
    ax2.set_xlabel('Distance along the strike [km]')    
    ax2.set_ylim(DipAx[0],DipAx[-1])
    ax2.set_xlim(StrikeAx[0],StrikeAx[-1])       
                         
                         
    ax.axis["bottom"].major_ticklabels.set_visible(False) 
    ax2.axis["bottom"].major_ticklabels.set_visible(False)
    ax2.axis["top"].set_visible(True)
    ax2.axis["top"].label.set_visible(True)
    
    
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("bottom", size="5%", pad=0.1)
    cb = plt.colorbar(im, cax=cax, orientation="horizontal")
    cb.set_label("Slip [m]") 
    ax2.plot([0], [0], '*', ms=225./(nsy+4))
    ax2.set_xticks(ax2.get_xticks()[1:-1])
    #ax.set_yticks(ax.get_yticks()[1:])
    #ax2.set_yticks(ax2.get_yticks()[:-1])
    

    
    #########Plotting the selected traces:
    nsp = len(PLotSt) * len(PlotSubf)
    plt.figure(figsize=(13,11))
    plt.title("Synthetics for rake = " + str(round(rake)))
    mindis = []
    maxdis = []
    for i, trid in enumerate(PLotSt):   
        x = np.arange(0,Ntraces[trid][1]-Ntraces[trid][0],
                      dt)
        for j, subf in enumerate(PlotSubf):
            y = A[subf, Ntraces[trid][0]:Ntraces[trid][1]]
            if j == 0:
                yy = y
            else:
                yy = np.vstack((yy,y))        
        maxdis.append(np.max(yy))
        mindis.append(np.min(yy))
        
    

    for i, trid in enumerate(PLotSt):   
        x = np.arange(0,Ntraces[trid][1]-Ntraces[trid][0],
                      dt)

        for j, subf in enumerate(PlotSubf):
            y = A[subf, Ntraces[trid][0]:Ntraces[trid][1]]
            plt.subplot2grid((len(PlotSubf), len(PLotSt)),
                              (j, i))                                
            plt.plot(x,y, linewidth=2.5)
            if j == 0:
                plt.title(trid)
            fig = plt.gca()            
            fig.axes.get_yaxis().set_ticks([])
            fig.set_ylabel(str(subf),rotation=0)
            fig.set_xlim((x[0],x[-1]))
            fig.set_ylim((mindis[i],maxdis[i]))
            if subf != PlotSubf[-1]:
                fig.axes.get_xaxis().set_ticks([])

    
    plt.show()
import DataUtils as util


def checkEncoding(inputText, code, outputText):
    if inputText == outputText:
        print "OK -", inputText
    else:
        print "WRONG - input:", inputText, "- code:", code, "- output:", outputText


# Test encoding of order
array = ["chain-prev", "bfs", "chain", "bfs-prev"]
for order in array:
    code = util.encode_order(order)
    order2 = util.decode_order(code)
    checkEncoding(order, code, order2)

# Test encoding of saturation
array = ["sat-like", "none", "sat-loop"]
for sat in array:
    code = util.encode_sat(sat)
    sat2 = util.decode_sat(code)
    checkEncoding(sat, code, sat2)

# Test encoding of saturation granularity
array = ["40", "1", "20", "2147483647", "80", "20", "5", ""]
for gran in array:
    code = util.encode_gran(gran)
    gran2 = util.decode_gran(code)
    checkEncoding(gran, code, gran2)
import DataUtils

print DataUtils.removeMultipleRows(range(9), [1,3,5])
print DataUtils.removeMultipleRows(range(9), [0,1,8])
print DataUtils.removeMultipleRows(range(9), [4,5,6,7])

matrix = [range(5), range(5,10), range(10,15), range(15,20)]

print DataUtils.removeMultipleColumns(matrix, [0,4])
print DataUtils.removeMultipleColumns(matrix, [3])
print DataUtils.removeMultipleColumns(matrix, [1,2,3])
print DataUtils.removeMultipleColumns(matrix, [0,2,4])
import DataUtils
from sklearn import svm
import Metrics

def inboxLabels(labels):
		for row in range(len(labels)):
			labels[row] = 100*labels[row][0] + 10*labels[row][1] + labels[row][2]
		return labels
	
missingCount = 4

for setNo in range(3,6):
	setNo = str(setNo)
	# Import set setNo
	dataSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-train.csv")
	testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv")
	names = dataSet.names
	features = dataSet.features

	# Inbox labels
	labels = inboxLabels(dataSet.labels)
	testLabels = inboxLabels(testSet.labels)

	missingCombinations = DataUtils.nPrCombinations(range(len(names)), missingCount) 
	
	print "SET", setNo
	
	for missingColumns in missingCombinations:
		# Print omitted featus
		#for col in missingColumns:
# Prepare training data

directory = "/home/richard/Project/masterproject_benchmark/R_source/Models/Time/"
setNo = "2"

import DataUtils
dataSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-train.csv")
features = dataSet.features
labels = dataSet.labels

# Select classifier
from sklearn import tree
from sklearn import neighbors
#classifier = tree.DecisionTreeClassifier()
classifier = neighbors.KNeighborsClassifier(n_neighbors=6, weights="uniform")
#classifier = neighbors.RadiusNeighborsClassifier(radius=100000000.0)

# Train classifier
classifier = classifier.fit(features, labels)

# Test classifier
testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv")
predictions = classifier.predict(testSet.features)

# Write results to csv file
results = DataUtils.ResultSet(testSet.id, predictions, testSet.labels)
DataUtils.write_resultset(results, directory +"Result-"+ setNo +".csv")

dim = len(predictions[0])

# Determine metrics based on confusion matrix
Example #41
0
def getPriceVectorDescriptor():
	return DUtils.getPVD()
Example #42
0
def getPriceVector(cardNumber,useRelevant):
    return DUtils.parseIntoPriceOnlyList(cardNumber,useRelevant)