def scrape_all_disease_contents_once(disease_db):
    common.get_logger().warning("Scraping all disease contents once more...")
    main_thread = threading.currentThread()
    disease_names = list(disease_db[common.ALL_DISEASES_VIEW].keys())
    n_diseases = len(disease_names)
    i_disease = 0
    is_still_need_to_scrape = False
    # create threads to scrape disease contents
    while (i_disease < n_diseases):
        if (threading.active_count() - 1 < common.MAX_THREADS) and \
            (i_disease < n_diseases):
            disease_name = disease_names[i_disease]
            the_disease = disease_db[common.ALL_DISEASES_VIEW][disease_name]
            if not the_disease.is_already_scraped:
                is_still_need_to_scrape = True
                t = threading.Thread(target=scrape_all_contents_of_a_disease,
                                        args=(the_disease, i_disease, n_diseases))
                t.start()
            i_disease += 1

        if threading.active_count() - 1 >= common.MAX_THREADS:
            time.sleep(common.TIMEOUT_WAIT_THREAD_FINISH)

    # wait for all the threads to finish
    while (threading.active_count() > 1):
        time.sleep(common.TIMEOUT_WAIT_THREAD_FINISH)

    return is_still_need_to_scrape
Exemple #2
0
def explode(key, patches, positions, output_dir, patchesPerImage):
    output_name = key + ".hdf5"
    ds = Dataset(output_name, output_dir, 1, patchesPerImage,
                 'DECAF', patch_dim=patches.shape[1],
                 patch_type='float32', pos_type='uint16')
    ds.append(key, patches, positions)
    get_logger().info("dataset with " + str(ds.keys.shape) + " elements, and patches " + str(patches.shape))
    ds.close()
def scrape_all_contents_of_a_disease(disease_obj, idx, n_diseases):
    common.get_logger().warning(''.join(['Scraping the ', str(idx),
        'th/', str(n_diseases), ' disease "', disease_obj.name, '"...']))
    for attr in DISEASE_ATTRS:
        setattr(disease_obj, attr[0],
                scrape_disease_attr_contents_from_url(disease_obj.url, attr[1]))

    disease_obj.is_already_scraped = True
Exemple #4
0
def do_nbnn(train_folder, test_folder):
    train = load_patches(args.train_folder)
    test = load_patches(args.test_folder)
    if options.relu:
        get_logger().info("Applying RELU")
        for class_data in train:
            class_data.patches = class_data.patches.clip(min=0)
        for class_data in test:
            class_data.patches = class_data.patches.clip(min=0)
    if options.scale:
        get_logger().info("Applying standardization")
        scaler = StandardScaler(copy=False)
        scaler.fit(np.vstack([t.patches for t in train]))
        for class_data in train:
            class_data.patches = scaler.transform(class_data.patches)
        for class_data in test:
            class_data.patches = scaler.transform(class_data.patches)
    if options.pca:
        get_logger().info("Calculating PCA")
        pca = RandomizedPCA(n_components=options.pca)
        pca.fit(np.vstack([t.patches for t in train]))
        #for class_data in train:
            #get_logger().info("Fitting class " + class_data.name)
            #pca.partial_fit(class_data.patches)
        get_logger().info("Keeping " + str(pca.explained_variance_ratio_.sum()) + " variance (" + str(options.pca) +
             ") components\nApplying PCA")
        for class_data in train:
            class_data.patches = pca.transform(class_data.patches)
        for class_data in test:
            class_data.patches = pca.transform(class_data.patches)
    nbnn(train, test, NN_Engine())
def explode(key, patches, positions, output_dir, patchesPerImage):
    output_name = key + ".hdf5"
    ds = Dataset(output_name,
                 output_dir,
                 1,
                 patchesPerImage,
                 'DECAF',
                 patch_dim=patches.shape[1],
                 patch_type='float32',
                 pos_type='uint16')
    ds.append(key, patches, positions)
    get_logger().info("dataset with " + str(ds.keys.shape) +
                      " elements, and patches " + str(patches.shape))
    ds.close()
Exemple #6
0
def load_patches(folder):
    #import pdb; pdb.set_trace()
    files = glob.glob(folder + "*.hdf5")
    num_classes = len(files)
    get_logger().info("Loading " + str(num_classes) + " classes from " + folder)
    all_features = []
    for pfile in files:
        f = hfile(pfile)
        iid = f["image_index"]
        class_patches = f["patches"][0:iid[:].max(), :]
        if options.use_position:
            class_positions = f["positions"][0:iid[:].max(), :]
            class_patches = np.hstack([class_patches, class_positions])
        all_features.append(ImageClass(class_patches, pfile, iid[:]))
    return all_features
Exemple #7
0
 def __init__(self, raw_msg=None, log=False):
     """
     根据接收到的消息实体初始化会自动设置被动回复的to/from user.若主动发送客服消息则无需消息实体
     :param raw_msg: 消息的XML实体
     :param log: 是否将接收到的XML记录到日志
     """
     self._receive = {}
     self._reply = {}
     self._items = []  # 通常为用户可见的消息实体
     if __debug__:
         self.log = get_logger(Message.__name__, "debug")
     else:
         self.log = get_logger(Message.__name__, "info")
     if raw_msg is not None:
         self.receiveMsg(raw_msg, log)
def translate_db(disease_db_en, language_code='vi'):
    translated_disease_db = dict()
    common.init_disease_db(translated_disease_db)
    all_diseases = list(disease_db_en[common.ALL_DISEASES_VIEW].values())
    for i in range(0, len(all_diseases)):
        original_disease = all_diseases[i]
        common.get_logger().info(''.join(['Translating the ', str(i),
            'th/', str(len(all_diseases)), ' disease "',
            original_disease.name, '"...']))
        translated_disease = translate_a_disease(original_disease,
                                                    language_code)
        translated_disease_db[common.ALL_DISEASES_VIEW][\
                                translated_disease.name] = translated_disease
    
    return disease_db_translated
Exemple #9
0
    def _test_grid_search(self, dataset, columns):
        dimension = [16, 32, 64]
        batchSize = [32, 64]
        learning_rate = [0.1]
        margin = [1, 0.1]
        regularizer_scale = [0.1]
        epochs = [50, 100, 500]
        count = 0
        max_fscore = 0
        max_prec_at_1 = 0

        model = dataset()
        logger = get_logger('RL.Test.GridSearch.VEER.' + str(model))

        for d, bs, lr, m, reg, e in \
            itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs):
            params = {
                'learning_rate': lr,
                'margin': m,
                'dimension': d,
                'epochs': e,
                'batchSize': bs,
                'regularizer_scale': reg
            }
            logger.info("\nTest:%d, PARAMS: %s", count, str(params))
            count = count + 1
            cur_fscore, cur_prec_at_1 = self._test_veer(
                dataset, columns, params)
            if max_fscore <= cur_fscore:
                max_fscore = cur_fscore
            if max_prec_at_1 <= cur_prec_at_1:
                max_prec_at_1 = cur_prec_at_1
            logger.info("Ran total %d Tests.", count)
            logger.info("Max Fscore: %f", max_fscore)
            logger.info("Max Mean Precision@1: %f", max_prec_at_1)
Exemple #10
0
def archive_prices():
    log = common.get_logger()
    log.info('Archiving Prices..')
    conn = common.get_connection()
    trans = conn.begin()

    try:
        #Insert new users
        conn.execute("""
                        INSERT INTO priceHistory (entity_id, price)
                        SELECT entity.id, entity.price FROM entity
                        WHERE (SELECT count(price) FROM pricehistory WHERE entity_id=entity.id)=0
                        AND price IS NOT NULL
                        """)
        #Update existing users
        conn.execute("""INSERT INTO priceHistory (entity_id, price)
                       SELECT entity.id, entity.price FROM entity
                       WHERE entity.price != (SELECT price FROM priceHistory
                       WHERE entity_id=entity.id ORDER BY timestamp DESC LIMIT 1)""")
        trans.commit()
    except:
        trans.rollback()
        raise
    conn.close()
    log.info('..done.')
    def _test_grid_search(self, model):
        dimension= [64, 256]
        batchSize= [32, 128]
        learning_rate= [0.1, 0.5]
        margin= [1]
        regularizer_scale = [0.1]
        epochs = [1000]
        neg_rel_rate = [1, 4]
        neg_rate = [1, 7]

        logger = get_logger('RL.Test.ear.GridSearch.KR_EAR' + str(model))
        count = 0
        max_fscore = 0
        max_prec_at_1 = 0
        for d, bs, lr, m, reg, e, nr, nrr in \
                itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs, neg_rate, neg_rel_rate):
            params = {'learning_rate': lr, 'margin': m, 'dimension': d, 'epochs': e, 'batchSize' : bs,
                            'regularizer_scale' : reg, 'neg_rate' : nr, 'neg_rel_rate' : nrr}
            logger.info("\nPARAMS: %s", str(params))
            count = count + 1
            cur_fscore, cur_prec_at_1 = self._test_kr_ear(model, params)
            if max_fscore <= cur_fscore:
                max_fscore = cur_fscore
            if max_prec_at_1 <= cur_prec_at_1:
                max_prec_at_1 = cur_prec_at_1

            logger.info("Ran total %d Tests.", count)
            logger.info("Max Fscore: %f", max_fscore)
            logger.info("Max Precision@1: %f", max_prec_at_1)
    def test_census(self):
        logger = get_logger('RL.Test.KmeansClustering.CENSUS')

        census = Census()

        compare_cl = census.get_comparision_object()
        features = compare_cl.compute(census.candidate_links,
                                      census.trainDataA, census.trainDataB)
        logger.info("Features %s", str(features.describe()))

        # Train K-Means Classifier
        logrg = recordlinkage.KMeansClassifier(algorithm='full',
                                               max_iter=1000,
                                               random_state=42)
        logrg.fit(features)

        result = logrg.predict(features)
        log_quality_results(logger, result, census.true_links,
                            len(census.candidate_links))

        #Test the classifier
        compare_cl = census.get_comparision_object()
        features = compare_cl.compute(census.test_links, census.testDataA,
                                      census.testDataB)
        logger.info("Features %s", str(features.describe()))

        result = logrg.predict(features)
        log_quality_results(logger, result, census.true_test_links,
                            len(census.test_links))
    def test_febrl(self):
        logger = get_logger('RL.Test.KmeansClustering.FEBRL')

        febrl = FEBRL()

        compare_cl = febrl.get_comparision_object()
        features = compare_cl.compute(febrl.candidate_links, febrl.trainDataA,
                                      febrl.trainDataB)
        logger.info("Features %s", str(features.describe()))

        # Train K-Means Classifier
        logrg = recordlinkage.KMeansClassifier()
        logrg.fit(features)

        result = logrg.predict(features)
        log_quality_results(logger, result, febrl.true_links,
                            len(febrl.candidate_links))

        #Test the classifier
        compare_cl = febrl.get_comparision_object()
        features = compare_cl.compute(febrl.test_links, febrl.testDataA,
                                      febrl.testDataB)
        logger.info("Features %s", str(features.describe()))

        result = logrg.predict(features)
        log_quality_results(logger, result, febrl.true_test_links,
                            len(febrl.test_links))
    def test_cora(self):
        logger = get_logger('RL.Test.KmeansClustering.CORA')

        #Read Train data in dataset A & B
        cora = Cora()

        ## Extarct Features
        compare_cl = cora.get_comparision_object()
        features = compare_cl.compute(cora.candidate_links, cora.trainDataA,
                                      cora.trainDataB)
        logger.info("Features %s", str(features.describe()))

        # Train K-Means Classifier
        logrg = recordlinkage.KMeansClassifier()
        logrg.fit(features)

        result = logrg.predict(features)
        log_quality_results(logger, result, cora.true_links,
                            len(cora.candidate_links))

        #Test the classifier
        compare_cl = cora.get_comparision_object()
        features = compare_cl.compute(cora.test_links, cora.testDataA,
                                      cora.testDataB)
        logger.info("Features %s", str(features.describe()))

        result = logrg.predict(features)
        log_quality_results(logger, result, cora.true_test_links,
                            len(cora.test_links))
Exemple #15
0
    def __init__(self, request, client_address, server):
        # Read configuration parameter
        self.__isdebug = CONTEXT['debug']
        self.__size = CONTEXT['request_size']
        self.__logger = get_logger("TCPRequestHandler")

        # Compression Utility
        self.__compressedcontent = CONTEXT['compressed_content']
        self.__compression = Utility()
        self.__client = client_address

        # populate the seller list
        self.__sellerObj = server.sellerObject

        # Initialize Ad Exchange
        self.__adExObject = server.adExObject

        # Initialize the DB connection
        self.__dbConnection = server.dbConnection

        if DB_PARAMS['truncate']:
            self.__dbConnection.query("truncate table `GreyFiber`.`IPAllocation`")

        self.__infra_tested = TEST_PARAMS['infra_tested']

        if self.__infra_tested == "MININET":
            self.__mininetConnection = server.mininetConnection
            self.__floodlightConnection = server.floodlightConnection

        # Call base class
        BaseRequestHandler.__init__(self, request, client_address, server)
Exemple #16
0
    def __init__(self,
                 buyer_data=TEST_PARAMS['buyer_file_name'],
                 path_to_data=TEST_PARAMS['path'],
                 totalReqs=-1):
        '''
         Class constructor
        '''
        Thread.__init__(self)

        # Remote service bindings
        self.__serverhosts = SERVER_BINDING['address']
        self.__serverport = int(SERVER_BINDING['port'])
        # Buffer settings
        self.__bufferdim = int(CONTEXT['client_socket_buffer'])
        # Compression helper
        self.__compression = Utility()
        self.__resourcepath = TEST_PARAMS['client_path']
        self.__buyerFile = buyer_data
        self.__client_request_type = TEST_PARAMS['client_request_type']
        self.__client_request_code = TEST_PARAMS['client_request_code']
        self.__logger = get_logger("TCPClient")
        self.__conn_timeout = 1
        self.__recv_timeout = 6000

        self.totalReqs = totalReqs
    def __init__(self, act_size):
        self.name = 'stochastic'
        self.act_size = act_size
        self.logger = get_logger(self.name)

        self.local_model = None
        self.predictor = None
Exemple #18
0
    def __init__(self, request, client_address, server):
        # Read configuration parameter
        self.__isdebug = CONTEXT['debug']
        self.__size = CONTEXT['request_size']
        self.__logger = get_logger("TCPRequestHandler")

        # Compression Utility
        self.__compressedcontent = CONTEXT['compressed_content']
        self.__compression = Utility()
        self.__client = client_address

        # populate the seller list
        self.__sellerObj = server.sellerObject

        # Initialize Ad Exchange
        self.__adExObject = server.adExObject

        # Initialize the DB connection
        self.__dbConnection = DBConnection()

        if DB_PARAMS['truncate']:
            self.__dbConnection.query("truncate table `VirtualFiber`.`IPAllocation`")

        self.__infra_tested = TEST_PARAMS['infra_tested']

        # Call base class
        BaseRequestHandler.__init__(self, request, client_address, server)
Exemple #19
0
    def __init__(self, output_name, output_dir, num_files, patches, feature_type,
                 patch_dim=128, patch_type='uint8', pos_type='uint16'):
        self.log = get_logger()

        output_subdir = output_dir
        try:
            makedirs(output_subdir)
        except:
            pass

        output_filename = join(output_subdir, basename(output_name))
        self.log.debug('Saving extracted descriptors to %s', output_filename)

        self.mode = 'creating'
        dt = special_dtype(vlen=bytes)
        patches += 10 #for safety
        self.hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0)
        self.patches = self.hfile.create_dataset('patches', (num_files * patches, patch_dim), dtype=patch_type, chunks=True)
        self.positions = self.hfile.create_dataset('positions', (num_files * patches, 2), dtype=pos_type, chunks=True)
        self.image_index = self.hfile.create_dataset('image_index', (num_files, 2), dtype='uint64') # Start, End positions of an image
        self.keys = self.hfile.create_dataset('keys', (num_files, ), dtype=dt)
        self.key_set = set()
        self.patches.attrs['cursor'] = 0
        self.patches.attrs['feature_type'] = feature_type

        self.output_filename = output_filename
Exemple #20
0
def extract_decaf(input_dir, output_dir, network_data_dir, files, num_patches, patch_size, image_dim, levels, oversample, layer_name, decaf_oversample, extraction_method):
    log = get_logger()
    BATCH_SIZE = 16
    #ex = DecafExtractor.DecafExtractor(layer_name)
    #ex = CaffeExtractorPlus.CaffeExtractorPlus(
                       #network_data_dir + 'hybridCNN_iter_700000_upgraded.caffemodel',
                       #network_data_dir + 'hybridCNN_deploy_no_relu_upgraded.prototxt',
                       #network_data_dir + 'hybrid_mean.npy')
    ex = NewCaffeExtractor.NewCaffeExtractor()
    #import pdb; pdb.set_trace()
    ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE)
    if oversample:
        log.info('Extracting with mirror combinations (X,Y,X-Y,Y-X')
        ex.enable_data_augmentation()


    ds = Dataset(input_dir, output_dir, len(files),
                 num_patches * ex.get_number_of_features_per_image(),
                 'CAFFE', patch_dim=ex.get_descriptor_size(),
                 patch_type='float32', pos_type='uint16')

    for f in files:
        if f in ds:
            log.info('Skipping <%s>. Already in the dataset.', basename(f))
            continue

        try:
            features = ex.extract_image(f)
        except:
            features = None
        if features is not None and features.cursor > 0:
            (patches6, patches7, positions) = features.get()

            ds.append(f, patches6, patches7, positions)
Exemple #21
0
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='SVM based classification for whole images.')
    parser.add_argument("--input-dir", dest="input_dir",
                        help="Directory containing HDF5 files.")
    parser.add_argument("--num-train-images", dest="num_train_images", type=int,
                        help="Number of images to use from training set.")
    parser.add_argument("--num-test-images", dest="num_test_images", type=int,
                        help="Number of images to use from the test set.")
    parser.add_argument("--patch_name", dest="patch_name",
                        help="The name of the patches in the HDF5 File.")
    parser.add_argument("--patches-per-image", dest="patches_per_image", type=int,
                        help="Number of patches for each image.")
    parser.add_argument("--cmd", dest="cmd",
                        choices=['whole-image-svm', 'svm-nbnl'],
                        help="Command to execute.")
    args = parser.parse_args()
    patchOptions.patch_name=args.patch_name
    if not 'input_dir' in args:
        log.error('input dir is required, but not present.')
        exit()
    if not 'cmd' in args:
        log.error('cmd is required, but not present.')
        exit()
    if not 'num_train_images' in args:
        log.error('num_train_images is required, but not present.')
        exit()
    if not 'num_test_images' in args:
        log.error('num_test_images is required, but not present.')
        exit()
    return args
Exemple #22
0
def archive_values():
    log = common.get_logger()
    log.info('Archiving values..')
    conn = common.get_connection()
    trans = conn.begin()
    try:
        #Insert new users
        conn.execute("""
                        INSERT INTO ValueHistory (user_id, value)
                        (select id, value from User where
                        (select count(value) from ValueHistory where user_id=User.id)=0
                        and value is not NULL)
                        """)

        #Update existing users
        conn.execute("""INSERT INTO ValueHistory (user_id, value)
                       SELECT User.id, User.value FROM User
                       WHERE User.value != (SELECT value FROM ValueHistory
                       WHERE user_id=User.id ORDER BY timestamp DESC LIMIT 1)""")
        trans.commit()
    except:
        trans.rollback()
        raise
    conn.close()
    log.info('..done.')
Exemple #23
0
def load_split_whole_image_only(input_folder, nTrain, nTest):
    logger = get_logger()
    files = sorted(glob( join(input_folder, '*.hdf5') ), key=basename)
    nClasses = len(files);
    logger.info("Loading " + str(nClasses) + " classes")
    train_patches = np.empty([nClasses*nTrain, patchOptions.size]) # nClasses*nSamples x nFeatures
    test_patches = np.empty([nClasses*nTest, patchOptions.size])
    train_labels = np.empty([nClasses*nTrain])
    test_labels = np.empty([nClasses*nTest])
    start = time.clock()
    train_patch_count = test_patch_count = 0
    for (classNumber,filename) in enumerate(files):
        hfile = HDF5File(filename, 'r')
        iid = hfile["image_index"][:]
        nImages = iid.shape[0]
        assert nImages >= (nTrain + nTest), "Not enough images!"
        np.random.shuffle(iid)
        trainIdx = iid[0:nTrain]
        testIdx  = iid[nTrain:nTrain+nTest]
        patches = hfile[patchOptions.patch_name]
        for iid in trainIdx:
            train_patches[train_patch_count]=patches[iid[0]]
            train_patch_count += 1
        train_labels[classNumber*nTrain:(classNumber+1)*nTrain]=classNumber*np.ones(nTrain)
        for iid in testIdx:
            test_patches[test_patch_count]=patches[iid[0]]
            test_patch_count += 1
        test_labels[classNumber*nTest:(classNumber+1)*nTest]=classNumber*np.ones(nTest)
        logger.info("Patch count: " + str(train_patch_count) + " training and " + str(test_patch_count) + " test patches for class " + filename)
        hfile.close()
    end = time.clock()
    logger.info("It took " + str((end-start)) + " seconds");
    LoadedData = namedtuple("LoadedData","train_patches train_labels test_patches test_labels")
    return LoadedData(train_patches, train_labels, test_patches, test_labels)
    def _test_grid_search(self, dataset):
        dimension = [50, 80, 120]
        batchSize = [100]
        learning_rate = [0.1, 0.2]
        margin = [0.5, 1]
        regularizer_scale = [0.1, 0.2]
        epochs = [100, 500]
        neg_rel_rate = [1, 2, 5]
        neg_rate = [1, 5, 10]
        count = 0
        max_fscore = 0

        model = dataset()
        logger = get_logger('RL.Test.GridSearch.TransE.' + str(model))

        for d, bs, lr, m, reg, e, nr, nrr in \
            itertools.product(dimension, batchSize, learning_rate, margin, regularizer_scale, epochs, neg_rate, neg_rel_rate):
            params = {
                'learning_rate': lr,
                'margin': m,
                'dimension': d,
                'epochs': e,
                'batchSize': bs,
                'regularizer_scale': reg,
                'neg_rate': nr,
                'neg_rel_rate': nrr
            }
            logger.info("\nPARAMS: %s", str(params))
            count = count + 1
            cur_fscore = self._test_transe(dataset, params)
            if max_fscore <= cur_fscore:
                max_fscore = cur_fscore

        logger.info("Ran total %d Tests.", count)
        logger.info("Max Fscore: %f", max_fscore)
Exemple #25
0
def select_random_support(train_dir, support_dir, num_train_images,
                          support_size, position_influence):
    log = get_logger()

    train_files = [
        f for f in glob(join(train_dir, '*'))
        if splitext(f.lower())[1] == '.hdf5'
    ]

    try:
        os.makedirs(support_dir)
    except:
        pass

    for target_file in train_files:
        log.info('Extracting random support from "%s"...',
                 basename(target_file))
        #(patches, _)= get_standardized_patches(target_file, num_train_images, position_influence)
        (patches, _) = get_patches(target_file, num_train_images,
                                   position_influence)
        rand_ix = random.sample(range(patches.shape[0]),
                                min(patches.shape[0], support_size))
        patches = patches[np.array(rand_ix), :]

        fh = HDF5File(join(support_dir, basename(target_file)), 'w')
        ds = fh.create_dataset('support', patches.shape, dtype='float')
        ds[:] = patches
        ds.attrs['cursor'] = patches.shape[0]

        fh.close()
Exemple #26
0
def classify_with_support(engine,
                          test_dir,
                          support_dir,
                          num_train_images,
                          num_test_images,
                          position_influence,
                          support_size=0):
    log = get_logger()

    test_files = sorted(glob(join(test_dir, '*.hdf5')), key=basename)
    num_classes = len(test_files)

    log.info('Testing w.r.t. %d classes.' % num_classes)
    if position_influence > 0:
        log.info('Position influence (alpha) is %.2f.', position_influence)

    # Allocating distances for each test class
    dists = np.ndarray((num_classes, num_classes, num_test_images))

    # Identifying labels
    labels = np.vstack([
        c * np.ones((1, num_test_images), dtype=np.int)
        for c in range(num_classes)
    ])

    log.info('Looking for nearest neighbors...')
    for (support_class, f) in enumerate(test_files):
        support_filename = join(support_dir, basename(f))

        if is_selected_support(support_filename):
            support = get_support(support_filename, support_size)
        else:
            support, _ = get_patches(support_filename, num_train_images,
                                     position_influence)

        # Creating index for current class
        log.info('\tBuilding index from support of class "%s"...', basename(f))
        engine.fit(support)
        del support

        # Evaluating test samples for all classes using current index
        for (test_class, test_filename) in enumerate(test_files):
            (test_patches,
             test_image_index) = get_patches(test_filename, num_test_images,
                                             position_influence)

            log.info('\tLooking for NNs of "%s"...', basename(test_filename))
            im_to_class_dists = engine.dist(test_patches)

            if len(im_to_class_dists.shape) > 1:  # In case of k-NN, we average
                im_to_class_dists = im_to_class_dists.mean(axis=1)

            dists[support_class, test_class, :] = \
                np.array([sum(im_to_class_dists[ix[0]:ix[1]]) for ix in test_image_index])

    predictions = dists.argmin(axis=0)
    acc = (labels == predictions).mean()
    log.info('*** Recognition accuracy is: %.2f%%', acc * 100)

    return acc
Exemple #27
0
def fetch_email(config_file, run_forever):
    """
    Start Backup Service
    """
    # Load config
    config = common.load_config_file(config_file)

    # Load logging config
    common.setup_logging_config("%s/../config/" % __abs_dirpath__)

    # Get logger
    logger = common.get_logger("app")

    logger.info("Starting mail parsing...")
    credentials_dir = os.path.join(__abs_dirpath__, '../.credentials')

    # Enhance configuration
    config['credentials_dir'] = credentials_dir

    # Instantiate services
    mail_reader = GmailReader(config=config, logger=logger)
    job_queuer = RedisJobQueuer(config=config, logger=logger)

    hackpad_processor = HackpadMailProcessor(config=config,
                                             mail_reader=mail_reader,
                                             job_queuer=job_queuer,
                                             logger=logger)

    if run_forever:
        hackpad_processor.run_forever()
    else:
        hackpad_processor.fetch_and_process_emails()
Exemple #28
0
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='HD5 Splitter.')
    parser.add_argument("--input-dir",
                        dest="input_dir",
                        help="Directory with HDF5 images.")
    parser.add_argument("--output-dir",
                        dest="output_dir",
                        help="Directory to put HDF5 files to.")
    parser.add_argument("--patches",
                        dest="patches",
                        type=int,
                        default=100,
                        help="Number of patches to extract per image.")

    args = parser.parse_args()

    if not args.input_dir:
        log.error('input-dir option is required, but not present.')
        exit()

    if not args.output_dir:
        log.error('output-dir option is required, but not present.')
        exit()

    return args
Exemple #29
0
def do_nbnl(args):
    logger = get_logger()
    logger.info("Getting indexes")
    data = get_indexes(args.input_dir, args.num_train_images, args.num_test_images, args.patches_per_image)
    train = data.Train
    num_classes = len(train)
    logger.info("Loading training patches")
    X = np.vstack([t.get_patches() for t in train])
    for t in train: t.unload()
    Y = np.vstack([c*np.ones((train[c].get_num_patches(),1), dtype=np.int) for c in range(num_classes)])
    clf = svm.LinearSVC(dual=False)
    logger.info("Training Linear SVM at patch level")
    logger.info(str(X.shape) + " X, " + str(Y.shape) + " Y")
    clf.fit(X,Y.ravel())
    logger.info("Training completed, freeing training patches")
    del X, Y
    test = data.Test
    testX = np.vstack([t.get_patches() for t in test])
    for t in test: t.unload()
    testY = np.vstack([c*np.ones((test[c].get_num_patches(),1), dtype=np.int) for c in range(num_classes)])
    logger.info(str(testX.shape) + " testX, " + str(testY.shape) + " testY")
    logger.info("Evaluating test patches...")
    confidence = clf.decision_function(testX)
    predicted = np.argmax(confidence,1)
    correct=(predicted==testY.ravel()).sum()
    score = clf.score(testX, testY)
    logger.info("Accuracy " + str(score) + " at patch level " + str((100.0*correct)/len(predicted)))
    test_indexes = np.empty([num_classes, args.num_test_images, 2])
    for c in range(num_classes):
        test_indexes[c]=test[c].get_new_indexes()
    image_labels = np.vstack([c*np.ones((args.num_test_images,1)) for c in range(num_classes)])
    nbnl(confidence, test_indexes, image_labels)
Exemple #30
0
 def __init__(self):
     # Constants describing the training process.
     self.moving_average_decay = 0.9999  # The decay to use for the moving average.
     self.num_steps_per_decay = 1000  # Epochs after which learning rate decays.
     self.learning_rate_decay_factor = 0.95  # Learning rate decay factor.
     self.intial_learning_rate = args.learning_rate
     self.batch_size = args.batch_size
     self.eval_batch_size = args.batch_size
     self.num_examples_per_epoch_for_val = 100
     self.val_iter = self.num_examples_per_epoch_for_val / self.batch_size
     self.image_h = args.image_h
     self.image_w = args.image_w
     self.image_c = args.image_c
     self.num_classes = args.num_classes  # cup, disc, other
     self.max_steps = args.total_epoches
     self.batch_size = args.batch_size
     self.image_path = args.train_path
     self.test_path = args.test_path
     self.finetune_ckpt = args.finetune
     self.test_ckpt = args.test
     self.loss_func = args.loss
     self.save_image = args.save_image
     self.log_dir = os.path.join('logs', args.note)
     self.output = get_logger('segnet', folder=self.log_dir)
     self.dataset = Dataset(args)
     self.sess_config = tf.ConfigProto(allow_soft_placement=True,
                                       log_device_placement=False)
     self.sess_config.gpu_options.allow_growth = True
    def _test_transh(self, dataset, params):
        graph = Graph_ER(dataset)
        model = dataset()
        logger = get_logger('RL.Test.er.TransH.' + str(model))

        transh = TransH(graph,
                        dimension=params['dimension'],
                        learning_rate=params['learning_rate'],
                        margin=params['margin'],
                        regularizer_scale=params['regularizer_scale'],
                        batchSize=params['batchSize'],
                        neg_rate=params['neg_rate'],
                        neg_rel_rate=params['neg_rel_rate'])
        loss = transh.train(max_epochs=params['epochs'])
        logger.info("Training Complete with loss: %f", loss)

        ent_embeddings = transh.get_ent_embeddings()
        result_prob = []
        for i in range(0, len(graph.entity_pairs)):
            distance = abs(
                spatial.distance.cosine(
                    ent_embeddings[graph.entity_pairs[i][0]],
                    ent_embeddings[graph.entity_pairs[i][1]]))
            result_prob.append(
                (graph.entity_pairs[i][0], graph.entity_pairs[i][1], distance))
            #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph.entity_pairs[i] in true_pairs)

        #Write Embeddings to file
        export_embeddings('er', str(model), 'TransH', graph.entity,
                          ent_embeddings)
        export_result_prob(dataset, 'er', str(model), 'TransH', graph.entity,
                           result_prob, graph.true_pairs)
        optimal_threshold, max_fscore = get_optimal_threshold(
            result_prob, graph.true_pairs)

        try:
            logger.info("MAX FSCORE: %f AT : %f", max_fscore,
                        optimal_threshold)
            result = pd.MultiIndex.from_tuples([(e1, e2)
                                                for (e1, e2, d) in result_prob
                                                if d <= optimal_threshold])
            params['threshold'] = optimal_threshold
            log_quality_results(logger, result, graph.true_pairs,
                                len(graph.entity_pairs), params)
            export_false_negatives(dataset, 'er', str(model), 'TransH',
                                   graph.entity, result_prob, graph.true_pairs,
                                   result, graph.entity)
            export_false_positives(dataset, 'er', str(model), 'TransH',
                                   graph.entity, result_prob, graph.true_pairs,
                                   result, graph.entity)
        except:
            logger.info("Zero Reults")

        #Log MAP, MRR and Hits@K
        ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs)
        p_at_1 = ir_metrics.log_metrics(logger, params)

        transh.close_tf_session()
        return (max_fscore, p_at_1)
Exemple #32
0
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='Patch/descriptor extraction utility.')
    parser.add_argument("--patches", dest="patches", type=int, default=1000,
                        help="Number of patches to extract per image.")
    parser.add_argument("--patch-size", dest="patch_size", type=int, default=16,
                        help="Size of the patch.")
    parser.add_argument("--image-dim", dest="image_dim", type=int,
                        help="Size of the largest image dimension.")
    parser.add_argument("--levels", dest="levels", type=int, default=3,
                        help="Number of hierarchical levels to extract patches from. Procedure starts from <patch-size> and divides it by 2 at each level.")
    parser.add_argument("--descriptor", dest="descriptor", default='DECAF',
                        choices=['DECAF'],
                        help="Type of feature descriptor.")
    parser.add_argument("--input-dir", dest="input_dir",
                        help="Directory with JPEG images.")
    parser.add_argument("--output-dir", dest="output_dir",
                        help="Directory to put HDF5 files to.")
    parser.add_argument("--num-train-images", dest="num_train_images", type=int,
                        help="Number of train images.")
    parser.add_argument("--num-test-images", dest="num_test_images", type=int,
                        help="Number of test images.")
    parser.add_argument("--split", dest="split", type=int,
                        help="Split to extract.")
    parser.add_argument("--oversample", dest="oversample", action='store_true',
                        help="Add patch flipping.")
    parser.add_argument("--decaf-oversample", dest="decaf_oversample", action='store_true',
                        help="Caffe oversampling. Flip X, Y, etc.")
    parser.add_argument("--layer-name", dest="layer_name",
                        help="Decaf layer name.")
    parser.add_argument("--network-data-dir", dest="network_data_dir",
                        help="Directory holding the network weights.")
    parser.add_argument("--patch-method", dest="patch_method",
                        help="What method to use to extract patches.")

    args = parser.parse_args()

    if not args.input_dir:
        log.error('input-dir option is required, but not present.')
        exit()

    if not args.output_dir:
        log.error('output-dir option is required, but not present.')
        exit()

    if not args.image_dim:
        log.error('image-dim option is required, but not present.')
        exit()

    if not args.num_train_images:
        log.error('num_train_images option is required, but not present.')
        exit()

    if not args.num_test_images:
        log.error('num_test_images option is required, but not present.')
        exit()

    return args
    def balanced_extract(self, im, feature_storage, check_patch_coords,
                         transform, filename):
        (w, h) = im.size
        log = get_logger()
        # Extracting features from patches
        preprocessedPatches = np.empty([self.patches_per_image, 3, 227, 227],
                                       dtype="float32")
        positions = np.zeros((self.patches_per_image, 2), dtype="uint16")
        # Calculating patch step
        if self.levels > 0:
            patchesXLevel = self.patches_per_image / len(self.patch_sizes)
            log.info("Patches per level: " + str(patchesXLevel))
        k = 0
        if isinstance(
                transform,
                NopTransform):  # Hacky.... #TODO why only for NopTransform?
            # Extracting features for the whole image
            preprocessedPatches[k, ...] = self.transformer.preprocess(
                'data', self.to_rgb(im))
            positions[k, ...] = np.matrix([0, 0])
            k += 1
        expected = 0
        skipped = 0

        for l in range(self.levels):
            countLevel = 0
            _w = w - self.patch_sizes[l]
            _h = h - self.patch_sizes[l]
            if (_w < 0 or _h < 0):
                continue
            patch_step = int((_w * _h / patchesXLevel)**0.52) + 2
            w_steps = np.arange(0, _w + 1, patch_step)
            h_steps = np.arange(0, _h + 1, patch_step)
            print "Image size (" + str(w)+", "+str(h)+") - patch size: " + str(self.patch_sizes[l]) + " patch step: " + str(patch_step) + " available pixels: (" + str(_w) +", "+str(_h)+") " \
                    "\n\twsteps: " + str(w_steps) + " \n\th_steps: " + str(h_steps)
            for i in range(len(w_steps)):
                for j in range(len(h_steps)):
                    expected += 1
                    x = w_steps[i]
                    y = h_steps[j]
                    patch_left = x + self.patch_sizes[l]
                    patch_bottom = y + self.patch_sizes[l]

                    if (check_patch_coords(x, y, patch_left, patch_bottom)
                            and patch_left <= w and patch_bottom <= h):
                        patch = im.crop((x, y, patch_left, patch_bottom))
                        patch.load()
                        countLevel += 1
                        preprocessedPatches[k,
                                            ...] = self.transformer.preprocess(
                                                'data', self.to_rgb(patch))
                        positions[k, ...] = np.matrix([x, y])
                        k += 1
                    else:
                        skipped += 1
            print "got " + str(countLevel) + " for level " + str(l)
        self.load_caffe_patches(preprocessedPatches[0:k], positions[0:k],
                                feature_storage)
        print "Expected " + str(expected) + " skipped: " + str(skipped)
Exemple #34
0
 def __init__(self):
     '''
     Add connection object here if exchange is physically located in a separate server
     '''
     self.__logger = get_logger("AdExchange")
     self.__logger.info("### Starting Ad Exchange...")
     self.__auction = ADEX['auction']
     self.__reserve = ADEX['reserve']
def extract(input_dir, output_dir, network_data_dir, num_patches, patch_size, image_dim, levels, layer_name):
    log = get_logger()
    BATCH_SIZE = 1
    log.info("Walking " + input_dir)
    ex = NewCaffeExtractor.NewCaffeExtractor()
    ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE)
    params = namedtuple("Params","input_dir output_dir extractor")
    os.path.walk(input_dir, walk, params(input_dir, output_dir, ex))
    def __init__(self, act_size, n_servers):
        self.name = 'greedy'
        self.n_servers = n_servers
        self.act_size = act_size
        self.logger = get_logger(self.name)

        self.local_model = None
        self.predictor = None
Exemple #37
0
    def __init__(self, act_size):
        self.name = 'round_robin'
        self.act_size = act_size
        self.action = 0
        self.logger = get_logger(self.name)

        self.local_model = None
        self.predictor = None
Exemple #38
0
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='NN support selection and classification tool.')
    parser.add_argument("--train-dir", dest="train_dir",
                        help="Directory containing training HDF5 files.")
    parser.add_argument("--test-dir", dest="test_dir",
                        help="Directory containing testing HDF5 files.")
    parser.add_argument("--support", dest="support",
                        help="Directory or file to store/get NN support.")
    parser.add_argument("--result-dir", dest="result_dir",
                        help="Directory to store NN distances.")
    parser.add_argument("--support-size", dest="support_size", type=int,
                        help="Support size to select from each class.")
    parser.add_argument("--num-train-images", dest="num_train_images", type=int,
                        help="Number of images to use from training set.")
    parser.add_argument("--num-test-images", dest="num_test_images", type=int,
                        help="Number of images to use from the test set.")
    parser.add_argument("--gamma", dest="gamma", type=float,
                        help="KDE bandwidth.")
    parser.add_argument("--knn", dest="knn", type=int, default=1,
                        help="Number of nearest neighbors to look for.")
    parser.add_argument("--alpha", dest="alpha", type=float, default=0,
                        help="Patch position influence.")
    parser.add_argument("--cmd", dest="cmd",
                        choices=['select-random', 'classify'],
                        help="Command to execute.")
    parser.add_argument("--alg-type", dest="alg_type", default='nn',
                        choices=['nn', 'kde'],
                        help="Nearest neighbor algorithm type.")
    parser.add_argument("--on_the_fly_splits", dest="on_the_fly_splits",
                        action='store_true',
                        help="Splits are computed on the fly.")
    parser.add_argument("--overwrite", dest="overwrite",
                        action='store_true',
                        help="Overwrite result of command (if any).")
    parser.add_argument("--patch_name", dest="patch_name",
                        help="The name of the patches in the HDF5 File.")

    args = parser.parse_args()

    if not 'cmd' in args:
        log.error('cmd option is required, but not present.')
        exit()

    if args.cmd is 'train' and not 'train_dir' in args:
        log.error('train-dir option is required, but not present.')
        exit()

    if not 'support' in args:
        log.error('support option is required, but not present.')
        exit()

    if args.cmd is 'nn' and not 'result_dir' in args:
        log.error('resilt-dir option is required, but not present.')
        exit()

    return args
    def balanced_extract(self, im, feature_storage, check_patch_coords, transform, filename):
        (w, h) = im.size
        log = get_logger()
        # Extracting features from patches
        preprocessedPatches = np.empty(
            [self.patches_per_image, 3, 227, 227], dtype="float32")
        positions = np.zeros((self.patches_per_image, 2), dtype="uint16")
        # Calculating patch step
        if self.levels > 0:
            patchesXLevel = self.patches_per_image / len(self.patch_sizes)
            log.info("Patches per level: " + str(patchesXLevel))
        k = 0
        if isinstance(transform, NopTransform):  # Hacky.... #TODO why only for NopTransform?
            # Extracting features for the whole image
            preprocessedPatches[k, ...] = self.transformer.preprocess(
                'data', self.to_rgb(im))
            positions[k, ...] = np.matrix([0, 0])
            k += 1
        expected = 0
        skipped = 0
        largestSide = max(im.size)
        smallestPatch = int(round(largestSide * float(32) / 200))
        self.patch_sizes = [smallestPatch,
                            smallestPatch * 2, smallestPatch * 4]
        for l in range(self.levels):
            countLevel = 0
            _w = w - self.patch_sizes[l]
            _h = h - self.patch_sizes[l]
            if(_w < 0 or _h < 0):
                continue
            patch_step = int((_w * _h / patchesXLevel) ** 0.52) + 2
            w_steps = np.arange(0, _w + 1, patch_step)
            h_steps = np.arange(0, _h + 1, patch_step)
            print "Image size (" + str(w) + ", " + str(h) + ") - patch size: " + str(self.patch_sizes[l]) + " patch step: " + str(patch_step) + " available pixels: (" + str(_w) + ", " + str(_h) + ") " \
                "\n\twsteps: " + str(w_steps) + " \n\th_steps: " + str(h_steps)
            for i in range(len(w_steps)):
                for j in range(len(h_steps)):
                    expected += 1
                    x = w_steps[i]
                    y = h_steps[j]
                    patch_left = x + self.patch_sizes[l]
                    patch_bottom = y + self.patch_sizes[l]

                    if (check_patch_coords(x, y, patch_left, patch_bottom) and
                            patch_left <= w and patch_bottom <= h and k < self.patches_per_image):
                        patch = im.crop((x, y, patch_left, patch_bottom))
                        patch.load()
                        countLevel += 1
                        preprocessedPatches[k, ...] = self.transformer.preprocess(
                            'data', self.to_rgb(patch))
                        positions[k, ...] = np.matrix([x, y])
                        k += 1
                    else:
                        skipped += 1
            print "got " + str(countLevel) + " for level " + str(l)
        self.load_caffe_patches(preprocessedPatches[0:k], positions[
                                0:k], feature_storage)
        print "Expected " + str(expected) + " skipped: " + str(skipped)
Exemple #40
0
    def _test_rl_transe(self, dataset, params):
        #Load Graph Data
        graph = Graph_ER(dataset)
        model = dataset()
        logger = get_logger('RL.Test.er.RLTransE.' + str(model))

        transe = TransE(graph,
                        dimension=params['dimension'],
                        learning_rate=params['learning_rate'],
                        margin=params['margin'],
                        regularizer_scale=params['regularizer_scale'],
                        batchSize=params['batchSize'],
                        neg_rate=params['neg_rate'],
                        neg_rel_rate=params['neg_rel_rate'])
        loss = transe.train(max_epochs=params['epochs'])
        logger.info("Training Complete with loss: %f", loss)

        ent_embeddings = transe.get_ent_embeddings()

        result_prob = []
        for (a, b) in graph.entity_pairs:
            a_triples = [(h, t, r) for (h, t, r) in graph.triples if h == a]
            b_triples = [(h, t, r) for (h, t, r) in graph.triples if h == b]

            distance = abs(
                spatial.distance.cosine(ent_embeddings[a], ent_embeddings[b]))
            for (ah, at, ar) in a_triples:
                bt = [t for (h, t, r) in b_triples if r == ar]
                if len(bt):
                    distance = distance + abs(spatial.distance.cosine(\
                                            ent_embeddings[at], ent_embeddings[bt[0]]))
            result_prob.append((a, b, distance))
            #logger.info("a: %d, b: %d distance: %f true_pairs: %s", a, b, distance, (a, b) in graph.true_pairs)

        #Write Embeddings to file
        export_embeddings('er', str(model), 'RLTransE', graph.entity,
                          ent_embeddings)
        export_result_prob(dataset, 'er', str(model), 'RLTransE', graph.entity,
                           result_prob, graph.true_pairs)
        optimal_threshold, max_fscore = get_optimal_threshold(
            result_prob, graph.true_pairs, max_threshold=3.0)

        try:
            params['threshold'] = optimal_threshold
            result = pd.MultiIndex.from_tuples([(e1, e2)
                                                for (e1, e2, d) in result_prob
                                                if d <= optimal_threshold])
            log_quality_results(logger, result, graph.true_pairs,
                                len(graph.entity_pairs), params)
        except:
            logger.info("Zero Reults")

        #Log MAP, MRR and Hits@K
        ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs)
        precison_at_1 = ir_metrics.log_metrics(logger, params)

        transe.close_tf_session()
        return (max_fscore, precison_at_1)
def generate_splitFiles(available_classes, dir_name, files):
    get_logger().info("Folder " + dir_name + " contains " + str(len(files)) + " files")
    folder_name = os.path.basename(os.path.normpath(dir_name))
    if not hasNumbers(folder_name): #we are only interested in instance level folders (they contain numbers)
        return
    basename =  nregex.match(folder_name).group(1)
    for f in files:
        #get_logger().info("Parsing " + join(dir_name,f))
        print("Parsing " + join(dir_name,f))
        old_file = join(dir_name,f)
        if os.path.isdir(old_file):
            continue
        elif not is_image(old_file):
            get_logger().info("Skipping " + old_file + ": not an image")
            continue
        if basename in available_classes:
            class_n = available_classes.index(basename)
            print(old_file + " " + str(class_n))
def create_hdf5_dataset(output_filename, patches, positions):
    log = get_logger()
    log.debug('Saving extracted descriptors to %s', output_filename)
    hfile = HDF5File(output_filename, 'w', compression='gzip', fillvalue=0.0)
    hpatches = hfile.create_dataset('patches', patches.shape, dtype="float32", chunks=True)
    hpositions = hfile.create_dataset('positions', positions.shape, dtype="uint16", chunks=True)
    hpatches[:]=patches
    hpositions[:]=positions
    hfile.close()
Exemple #43
0
 def load(self):
     get_logger().info("Loading patches for " + self.file_name)
     hfile = HDF5File(self.file_name, 'r')
     patches = hfile[self.patch_name]
     feature_dim = patches.shape[1]
     indexes = self.indexes
     num_patches=(indexes[:,1]-indexes[:,0]).sum()
     self.patches = np.empty([num_patches, feature_dim])
     self.new_index = np.empty([indexes.shape[0],2])
     patch_start = n_image = 0
     for iid in indexes:
         n_patches = iid[1]-iid[0]
         self.patches[patch_start:patch_start+n_patches,:] = patches[iid[0]:iid[1],:]
         self.new_index[n_image] = [patch_start, patch_start+n_patches]
         patch_start += n_patches
         n_image += 1
     hfile.close()
     get_logger().info("Loaded " + str(num_patches) + " patches")
Exemple #44
0
def nbnn(train, test, engine):
    num_classes = len(train)
    num_test_images = len(test[0].iid)
    dists = np.ndarray((num_classes, num_classes, num_test_images))
    # Identifying labels
    labels = np.vstack([c * np.ones((1, num_test_images), dtype=np.int) for c in range(num_classes)])
    for (support_class, class_data) in enumerate(train):
        get_logger().info("Loading class " + class_data.name + " as support - " + str(class_data.patches.shape))
        engine.fit(class_data.patches)
        for test_class, test_data in enumerate(test):
            #get_logger().info("Testing class " + test_data.name)
            im_to_class_dists = engine.dist(test_data.patches)
            #import pdb; pdb.set_trace()
            dists[support_class, test_class, :] = \
                np.array([sum(im_to_class_dists[ix[0]:ix[1]]) for ix in test_data.iid])
    predictions = dists.argmin(axis=0)
    acc = (labels == predictions).mean()
    get_logger().info('*** Recognition accuracy by ' + engine.name + ' is: ' + str(acc * 100))
Exemple #45
0
 def __init__(self):
     '''
     Initialize seller class
     '''
     self.__rsp = TEST_PARAMS['path']
     self.__sf = TEST_PARAMS['seller_file_name']
     self.__compressed = CONTEXT['compressed_content']
     self.__sellerGraph = nx.Graph()
     self.__logger = get_logger("Seller")
Exemple #46
0
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='Descriptor checker utility.')
    parser.add_argument("--input-dir", dest="input_dir",
                        help="Directory with HDF5 files.")

    args = parser.parse_args()
    return args
Exemple #47
0
    def _test_etranse(self, dataset, params):
        model = dataset()
        graph = Graph_ERER(dataset)
        logger = get_logger("RL.Test.erer.ETransE." + str(model))

        etranse = ETransE(graph,
                          dimension=params['dimension'],
                          batchSize=params['batchSize'],
                          learning_rate=params['learning_rate'],
                          margin=params['margin'],
                          neg_rate=params['neg_rate'],
                          neg_rel_rate=params['neg_rel_rate'],
                          regularizer_scale=params['regularizer_scale'],
                          alpha=params['alpha'],
                          beta=params['beta'])
        etranse.train(max_epochs=params['max_epochs'])
        ent_embeddings_a = etranse.get_ent_embeddings_A()
        ent_embeddings_b = etranse.get_ent_embeddings_B()

        result_prob = []
        for i in range(0, len(graph.entity_pairs)):
            distance = abs(
                spatial.distance.cosine(
                    ent_embeddings_a[int(graph.entity_pairs[i][0])],
                    ent_embeddings_b[int(graph.entity_pairs[i][1])]))
            result_prob.append(
                (graph.entity_pairs[i][0], graph.entity_pairs[i][1], distance))
            #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph.entity_pairs[i] in true_pairs)

        #Write Embeddings to file
        export_embeddings('erer', str(model), 'ETransE', graph.entityA,
                          ent_embeddings_a)
        export_embeddings('erer', str(model), 'ETransE', graph.entityB,
                          ent_embeddings_b)
        export_result_prob(dataset, 'erer', str(model), 'ETransE',
                           graph.entityA, result_prob, graph.true_pairs,
                           graph.entityB)
        optimal_threshold, max_fscore = get_optimal_threshold(
            result_prob, graph.true_pairs)

        try:
            params['threshold'] = optimal_threshold
            result = pd.MultiIndex.from_tuples([(e1, e2)
                                                for (e1, e2, d) in result_prob
                                                if d <= optimal_threshold])
            log_quality_results(logger, result, graph.true_pairs,
                                len(graph.entity_pairs), params)
        except Exception as e:
            logger.info("Zero Reults")
            logger.error(e)

        #Log MAP, MRR and Hits@K
        ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs)
        prec_at_1 = ir_metrics.log_metrics(logger, params)

        etranse.close_tf_session()
        return (max_fscore, prec_at_1)
def extract(input_dir, output_dir, network_data_dir, num_patches, patch_size,
            image_dim, levels, layer_name):
    log = get_logger()
    BATCH_SIZE = 1
    log.info("Walking " + input_dir)
    ex = NewCaffeExtractor.NewCaffeExtractor()
    ex.set_parameters(patch_size, num_patches, levels, image_dim, BATCH_SIZE)
    params = namedtuple("Params", "input_dir output_dir extractor")
    os.path.walk(input_dir, walk, params(input_dir, output_dir, ex))
Exemple #49
0
    def _test_erer(self, dataset, er_algo, params):
        model = dataset()
        graph = Graph_ERER(dataset)
        graph_er = graph.get_er_model()

        er_model = er_algo(graph_er,
                           dimension=params['dimension'],
                           learning_rate=params['learning_rate'],
                           margin=params['margin'],
                           regularizer_scale=params['regularizer_scale'],
                           batchSize=params['batchSize'],
                           neg_rate=params['neg_rate'],
                           neg_rel_rate=params['neg_rel_rate'])
        loss = er_model.train(max_epochs=params['epochs'])

        logger = get_logger('RL.Test.erer.ERER.' + str(model) + "." +
                            str(er_model))
        logger.info("Training Complete with loss: %f", loss)

        ent_embeddings = er_model.get_ent_embeddings()
        result_prob = []
        for i in range(0, len(graph_er.entity_pairs)):
            distance = abs(
                spatial.distance.cosine(
                    ent_embeddings[graph_er.entity_pairs[i][0]],
                    ent_embeddings[graph_er.entity_pairs[i][1]]))
            result_prob.append((graph_er.entity_pairs[i][0],
                                graph_er.entity_pairs[i][1], distance))
            #logger.info("i: %d, distance: %f true_pairs: %s", i, distance, graph_er.entity_pairs[i] in graph_er.true_pairs)

        #Write Embeddings to file
        export_embeddings("erer", str(model), str(er_model), graph_er.entity,
                          ent_embeddings)
        export_result_prob(dataset, 'erer', str(model), str(er_model),
                           graph_er.entity, result_prob, graph_er.true_pairs)

        optimal_threshold, max_fscore = get_optimal_threshold(
            result_prob, graph_er.true_pairs)

        try:
            params['threshold'] = optimal_threshold
            result = pd.MultiIndex.from_tuples([(e1, e2)
                                                for (e1, e2, d) in result_prob
                                                if d <= optimal_threshold])
            log_quality_results(logger, result, graph_er.true_pairs,
                                len(graph_er.entity_pairs), params)
        except:
            logger.info("Zero Reults")

        #Log MAP, MRR and Hits@K
        ir_metrics = InformationRetrievalMetrics(result_prob,
                                                 graph_er.true_pairs)
        ir_metrics.log_metrics(logger)

        er_model.close_tf_session()
        return max_fscore
    def _test_seea(self, dataset, params):
        model = dataset()
        graph = Graph_EAR(dataset)
        logger = get_logger('RL.Test.ear.SEEA.' + str(model))

        seea = SEEA(graph,
                    dimension=params['dimension'],
                    learning_rate=params['learning_rate'],
                    batchSize=params['batchSize'],
                    margin=params['margin'],
                    regularizer_scale=params['regularizer_scale'],
                    neg_rate=params['neg_rate'],
                    neg_rel_rate=params['neg_rel_rate'])

        #Begin SEEA iterations, passing true pairs only to debug the alignments.
        results = seea.seea_iterate(beta=params['beta'],
                                    max_iter=params['max_iter'],
                                    max_epochs=params['max_epochs'])
        try:
            result_pairs = pd.MultiIndex.from_tuples(results)
            fscore = log_quality_results(logger, result_pairs,
                                         graph.true_pairs,
                                         len(graph.entity_pairs), params)
        except Exception as e:
            logger.error(e)
            logger.info("No Aligned pairs found.")

        ent_embeddings = seea.get_ent_embeddings()
        export_embeddings('ear', str(model), 'SEEA', graph.entity,
                          ent_embeddings)

        result_prob = []
        for (e1, e2) in graph.entity_pairs:
            distance = abs(
                spatial.distance.cosine(ent_embeddings[e1],
                                        ent_embeddings[e2]))
            result_prob.append((e1, e2, distance))
        export_result_prob(dataset, 'ear', str(model), 'SEEA', graph.entity,
                           result_prob, graph.true_pairs)

        try:
            export_false_negatives(dataset, 'ear', str(model), 'SEEA',
                                   graph.entity, result_prob, graph.true_pairs,
                                   result_pairs, graph.entity)
            export_false_positives(dataset, 'ear', str(model), 'SEEA',
                                   graph.entity, result_prob, graph.true_pairs,
                                   result_pairs, graph.entity)
        except Exception as e:
            logger.error(e)

        #Log MAP, MRR and Hits@K
        ir_metrics = InformationRetrievalMetrics(result_prob, graph.true_pairs)
        prec_at_1 = ir_metrics.log_metrics(logger, params)

        seea.close_tf_session()
        return (fscore, prec_at_1)
Exemple #51
0
 def load(self):
     get_logger().info("Loading patches for " + self.file_name)
     hfile = HDF5File(self.file_name, 'r')
     patches = hfile[self.patch_name]
     feature_dim = patches.shape[1]
     indexes = self.indexes
     num_patches = (indexes[:, 1] - indexes[:, 0]).sum()
     self.patches = np.empty([num_patches, feature_dim])
     self.new_index = np.empty([indexes.shape[0], 2])
     patch_start = n_image = 0
     for iid in indexes:
         n_patches = iid[1] - iid[0]
         self.patches[patch_start:patch_start +
                      n_patches, :] = patches[iid[0]:iid[1], :]
         self.new_index[n_image] = [patch_start, patch_start + n_patches]
         patch_start += n_patches
         n_image += 1
     hfile.close()
     get_logger().info("Loaded " + str(num_patches) + " patches")
def get_arguments():
    log = get_logger()

    parser = ArgumentParser(description='Descriptor checker utility.')
    parser.add_argument("--input-dir",
                        dest="input_dir",
                        help="Directory with HDF5 files.")

    args = parser.parse_args()
    return args
def scrape_category_metadata(disease_db, view_name, cat_name, cat_url):
    common.get_logger().warning(''.join([
            'Scraping the category metadata "', cat_name, '"...']))

    disease_db[view_name][cat_name] = dict()

    tree = get_page_tree(cat_url)
    disease_ids = tree.xpath(XPATH_DISEASE_ID)
    disease_urls_tmp = tree.xpath(XPATH_DISEASE_URL)
    disease_urls = [''.join([DISEASES_DB_BASE_URL, url]) \
                    for url in disease_urls_tmp]
    disease_names = tree.xpath(XPATH_DISEASE_NAME)

    for i in range(0, len(disease_names)):
        disease_name = disease_names[i]
        disease_id = disease_ids[i]
        disease_url = disease_urls[i]
        disease_db[view_name][cat_name][disease_name] = \
            add_disease(disease_db, disease_name, disease_id, disease_url)
Exemple #54
0
 def __init__(self):
     '''
     Initialize DB Class
     '''
     self.__host = DB_PARAMS['address']
     self.__username = DB_PARAMS['username']
     self.__password = DB_PARAMS['password']
     self.__db = DB_PARAMS['database']
     self.__logger = get_logger("DBConnection")
     self.connect()
Exemple #55
0
 def __init__(self):
     '''
     Initialize DB Class
     '''
     self.__host = DB_PARAMS['address']
     self.__username = DB_PARAMS['username']
     self.__password = DB_PARAMS['password']
     self.__db = DB_PARAMS['database']
     self.__logger = get_logger("DBConnection")
     self.connect()
 def __init__(self):
     self.logger = get_logger(__name__)
     self.config = get_config()
     self.bce_access_key_id = self.config['BCE_ACCESS_KEY_ID']
     self.bce_secret_access_key = self.config['BCE_SECRET_ACCESS_KEY']
     self.bce_bos_host = self.config['BCE_BOS_HOST']
     self.bce_sts_host = self.config['BCE_STS_HOST']
     self.bos_src_bucket = self.config['BOS_SRC_BUCKET']
     self.bos_storage_class = self.config['BOS_STORAGE_CLASS']
     self.bos_des_dir = self.config['BCE_SECRET_ACCESS_KEY']
Exemple #57
0
def load_settings():
    c = load_configuration()
    ServerState.cluster_nodes = comma_string_to_list(c.get('CoreMQ', 'cluster_nodes', ','))
    ServerState.allowed_replicants = comma_string_to_list(c.get('CoreMQ', 'allowed_replicants', ''))
    ServerState.allowed_replicants.extend(ServerState.cluster_nodes)

    address = c.get('CoreMQ', 'address', '0.0.0.0')
    port = int(c.get('CoreMQ', 'port', '6747'))
    ServerState.listen_address = (address, port)
    ServerState.logger = get_logger(c, 'CoreMQ')
Exemple #58
0
 def __init__(self, selfAddress, partnerAddresses):
     '''
     Add connection object here if exchange is physically located in a separate server
     '''
     cfg = SyncObjConf(logCompactionMinEntries=2147483647,
                       logCompactionMinTime=2147483647)
     super(AdExchange, self).__init__(selfAddress, partnerAddresses, cfg)
     self.__logger = get_logger("AdExchange")
     self.__logger.info("### Starting Ad Exchange...")
     self.__auction = ADEX['auction']
     self.__reserve = ADEX['reserve']
def generate_splitFiles(available_classes, dir_name, files):
    get_logger().info("Folder " + dir_name + " contains " + str(len(files)) +
                      " files")
    folder_name = os.path.basename(os.path.normpath(dir_name))
    if not hasNumbers(
            folder_name
    ):  #we are only interested in instance level folders (they contain numbers)
        return
    basename = nregex.match(folder_name).group(1)
    for f in files:
        #get_logger().info("Parsing " + join(dir_name,f))
        print("Parsing " + join(dir_name, f))
        old_file = join(dir_name, f)
        if os.path.isdir(old_file):
            continue
        elif not is_image(old_file):
            get_logger().info("Skipping " + old_file + ": not an image")
            continue
        if basename in available_classes:
            class_n = available_classes.index(basename)
            print(old_file + " " + str(class_n))
def translate_a_disease(original_disease, language_code='vi'):
    translated_name = translate_one_paragraph(original_disease.name,
                                                language_code)
    common.get_logger().info(''.join(['Name in "', language_code,
        '" language: "', translated_name, '"']))
    translated_summary = translate_one_paragraph(original_disease.summary,
                                                    language_code)
    translated_symptoms = translate_paragraphs(original_disease.symptoms,
                                                language_code)
    translated_phenotypes = translate_paragraphs(original_disease.phenotypes,
                                                    language_code)
    translated_disease = disease.Disease(
        name=translated_name,
        url=original_disease.url,
        disease_id=original_disease.disease_id,
        summary=translated_summary,
        symptoms=translated_symptoms,
        phenotypes=translated_phenotypes
        )
    translated_disease.is_already_scraped = True

    return translated_disease