Esempio n. 1
0
def open_desc(tfile):
    cdb=utils.open_cdb(tfile)
    dlist=[]
    klist=utils.get_all_keys(cdb)
    cat=cdb["description"]
    for key in klist:
        if key.endswith("desc"):
            dlist.append((json.loads(str(cdb[bytes(key, 'utf-8')], 'utf-8')), cat))
    return(dlist)
Esempio n. 2
0
def score_cdb(tfile, cats, keywords, cl):
    cdb = utils.open_cdb(tfile)
    score, uscore = {}, {}
    for key in utils.get_all_keys(cdb):
        if key.endswith("desc"):
            cati = check_categories(
                cl, cats, json.loads(str(cdb[bytes(key, 'utf-8')], 'utf-8')))
            keywi = check_keywords(keywords,
                                   str(cdb[bytes(key, 'utf-8')], 'utf-8'))
            value = int(cati) + keywi
            score[cdb[bytes(key[:-4] + "url", 'utf-8')]] = value
            print("VAL:", cati, keywi)
    return (score)
Esempio n. 3
0
def agg_text(places):
    neo_dict={}
    for ffile in places:
        print(ffile)
        try:
            cdb=utils.open_cdb(ffile)
            for key in utils.get_all_keys(cdb):
                if key.startswith("http"):
                    scan=magic.Magic(mime=True).from_buffer(cdb[bytes(key, 'utf-8')])
                    print(scan)
                    if scan.startswith('text') is True:
                        print(key)
                        page=clean_html(str(cdb[bytes(key, 'utf-8')], 'utf-8'))
                        if len(page) > 18:
                            title=find_title(str(cdb[bytes(key, 'utf-8')], 'utf-8'))
                            neo_dict[title] = page
                            neo_dict["url-"+title]=key
            cdb.close()
        except:
            pass
    return(neo_dict)
def process_event(event):
    logger.debug('processing event ')
    logger.debug(event)
    inject_fault = True if random.randint(0, 100) <= int(
        config.config['fault_injection_rate_in_percent']) else False

    if inject_fault:
        possible_fields_for_modification = utils.get_all_keys(event)
        # select fault injection type
        # type: drop_key_value, change_value
        list_of_fault_injection_types = ['drop_key_value', 'change_value']
        selected_injection_type = list_of_fault_injection_types[random.randint(
            0,
            len(list_of_fault_injection_types) - 1)]
        logger.debug('selected injection type: ' + selected_injection_type)
        logger.debug('possible keys for modification: ')
        logger.debug(possible_fields_for_modification)
        key_value_to_modify = possible_fields_for_modification[random.randint(
            0,
            len(possible_fields_for_modification) - 1)]

        if selected_injection_type == 'drop_key_value':
            event = delete_keys_from_dict(event, [key_value_to_modify])

        elif selected_injection_type == 'change_value':
            event = modify_value_in_dict(event, [key_value_to_modify])

        logger.info('run ' + selected_injection_type + ' on ' +
                    key_value_to_modify)

    else:
        logger.info('did not modify event')

    logger.debug('remaining event:')
    logger.debug(event)

    return event
Esempio n. 5
0
import numpy as np

from utils import get_all_keys, get_existing_keys

existing_keys = get_existing_keys('keys_for_test')

existing_train_keys = get_existing_keys('keys_for_train')

all_test_keys = get_all_keys('/media/natasha/Data/Landmark Kaggle/test.csv')

print('all_test_keys ', len(all_test_keys))
print('existing_keys ', len(existing_keys))
input()
neighbors = np.load('100_nearest_neighbors_resnet.npy')
print('neighbors', neighbors)


def get_neighbors(neighbors, existing_train_keys, existing_test_keys,
                  test_key):
    index = np.where(existing_test_keys.__eq__(str(test_key)))[0][0]
    neighbors_indices = np.array(neighbors[index])
    return existing_train_keys[neighbors_indices]


def get_dummy_neighbors(existing_train_keys):
    result = []
    for i in np.random.random_integers(low=0,
                                       high=existing_train_keys.shape[0] - 1,
                                       size=100):
        result.append(existing_train_keys[i])
    return result
Esempio n. 6
0
            schema = json.load(f)
        except Exception as e:
            print("Error could not load :", schema_file, "\n", e)
            exit(-1)

    for config_file in dirs:
        params_in_file = set()
        parameters_not_in_schema = set()
        # print("Opening: ", config_file)
        with open(config_file, 'r+') as f:
            try:
                config = json.load(f)
            except Exception as e:
                print("Error could not load :", config_file, "\n", e)
                continue

            utils.get_all_keys(config, params_in_file)

            for param in params_in_file:
                if not utils.find_in_dict(schema, param):
                    #print ("Could not find in schema: ", param)
                    if "." not in param:
                        parameters_not_in_schema.add(param)
            print()
            print(config_file, ":")
            #print("parameters in config file: ", params_in_file )
            print(
                "parameters not in schema: ",
                parameters_not_in_schema.difference(
                    known_params_not_in_schema))