Esempio n. 1
0
 def print_progress(self):
     """ Print a progress bar representing the current state of the tasks.
     """
     tasks_number = len(self.tasks_status)
     done = sum(map(lambda x: x == 2, self.tasks_status))
     working = sum(map(lambda x: x == 1, self.tasks_status))
     utils.print_progress(self.print_control, tasks_number, done, working)
Esempio n. 2
0
def import_shapefile_to_db(filepath):
    connection = psycopg2.connect(host=DB_HOSTNAME, database="census_uk", user="******", password="******")
    cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor)

    count = 0
    shp = fiona.open(filepath)
    nb_tracts = len(shp)
    for feature in shp:
        type = feature['type']
        id = feature['id']
        geom = shape(feature['geometry'])
        if geom.geom_type == "Polygon":
            geom = MultiPolygon([geom])
        prop = feature['properties']
        geo_code = prop['geo_code']
        label = prop['label']

        query_string = """INSERT INTO tracts
                (geo_code, geom)
                VALUES (%s, ST_GeomFromText(%s, %s))
                ON CONFLICT DO NOTHING;"""
        data = (geo_code, geom.wkt, EPSG)
        cursor.execute(query_string, data)

        count += 1
        if count % 100 == 0:
            utils.print_progress("Done: %.2f" % (100.0 * count / nb_tracts))
        # if count == 10:
        #     break

    connection.commit()
Esempio n. 3
0
def calc_rama(grof, xtcf, btime, etime):
    u = Universe(grof, xtcf)

    resname_query = 'resname GLY or resname VAL or resname PRO'
    atoms = u.selectAtoms(resname_query)
    resname = atoms.resnames()[0] # [0] because .resnames() returns a list of one element
    resid = atoms.resids()[0] # [0] because .resnames() returns a list of one element

    phi_query = ('(resname ACE and name C) or '
                 '(resname GLY or resname VAL or resname PRO and '
                 '(name N or name CA or name C))')

    psi_query = ('(resname GLY or resname VAL or resname PRO and (name N or name CA or name C or name NT)) or '
                 '(resname NH2 and name N)')

    # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm
    phi = u.selectAtoms(phi_query)
    psi = u.selectAtoms(psi_query)

    for _ in phi.atoms:
        print _

    for _ in psi.atoms:
        print _


    for ts in u.trajectory:
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        yield '{0:.3f}  {1:.3f}  {2}-{3}\n'.format(
            phi.dihedral(), psi.dihedral(), resname, resid)
        U.print_progress(ts)
Esempio n. 4
0
def __gen_hist__(plot_details,verbose=False):
    '''
    generate single histogram
    only runs on samples with an input file defined, 
    ie. does not loop through daughters
    not really designed to be called by analysers
    '''
    log.debug( 'in get_hist for %s' % plot_details.sample.name )

    ## add the details from the sample
    #plot_details += plot_details.sample.plot_details

    s = plot_details.sample
    vd = plot_details.var_details
    sel = plot_details.get_selector()
    weights = plot_details.get_weights()
    target_lumi = plot_details.target_lumi

    if not s.is_active(): 
        log.warn('failed to gen hist %s for sample: %s' % (vd.name,s.name) )
        return None
    
    ## create hist 
    h = plot_details.new_hist(s)
    h.samples = s
                
    ## initialise sample
    s.initialise()

    ## return empty hist if no events
    if not s.tree.GetEntries(): return h
    
    s.switch_on_branches()
    
    ## selection
    event_list = sel.select(s) if sel else None
   

    ## initialise variables 
    s.prepare( plot_details )
    
    ## process events
    entries = event_list.GetN() if sel else s.tree.GetEntries()
    for i in xrange(entries):
        ientry = event_list.GetEntry(i) if sel else i
        s.tree.GetEntry(ientry)
        
        ## calculate event weight
        weight = weights.weight() if weights else 1. 

        ## fill hists
        val = vd.var.calc_vals()[0]
        
        h.Fill(val,weight)
        frac = float(i)/float(entries) if entries else 0.0
        if verbose: utils.print_progress(frac,title = '%s: '%s.name)
    if verbose: utils.clear_progress()
    #s.style_hist(h)
    if target_lumi: s.scale_hist(h,target_lumi)
    return h
Esempio n. 5
0
def extend_balancing_classes(X, y, aug_intensity=0.5, counts=None):
    num_classes = 43
    _, class_counts = np.unique(y, return_counts=True)
    max_c = max(class_counts)
    total = max_c * num_classes if counts is None else np.sum(counts)
    X_extended = np.empty([0, X.shape[1], X.shape[2], X.shape[3]],
                          dtype=X.dtype)
    y_extended = np.empty([0], dtype=y.dtype)
    print("Extending dataset using augmented data (intensity = {})".format(
        aug_intensity))

    for c, c_count in zip(range(num_classes), class_counts):
        max_c = max_c if counts is None else counts[c]
        X_source = (X[y == c] / 255.).astype(np.float32)
        y_source = y[y == c]
        X_extended = np.append(X_extended, X_source, axis=0)
        for i in range((max_c // c_count) - 1):
            batch_iterator = AugmentedSignsBatchIterator(
                batch_size=X_source.shape[0], p=1.0, intensity=aug_intensity)
            for x_batch, _ in batch_iterator(X_source, y_source):
                X_extended = np.append(X_extended, x_batch, axis=0)
                utils.print_progress(X_extended.shape[0], total)
        batch_iterator = AugmentedSignsBatchIterator(batch_size=max_c %
                                                     c_count,
                                                     p=1.0,
                                                     intensity=aug_intensity)
        for x_batch, _ in batch_iterator(X_source, y_source):
            X_extended = np.append(X_extended, x_batch, axis=0)
            utils.print_progress(X_extended.shape[0], total)
            break
        added = X_extended.shape[0] - y_extended.shape[0]
        y_extended = np.append(y_extended, np.full((added), c, dtype=int))
    return ((X_extended * 255.).astype(np.uint8), y_extended)
Esempio n. 6
0
def load_features(feature_store, is_blind):
    def _make_id(scanId, viewpointId):
        return scanId + '_' + viewpointId

    # if the tsv file for image features is provided
    if feature_store:
        tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features']
        features = {}
        with open(feature_store, "r") as tsv_in_file:
            print('Reading image features file %s' % feature_store)
            reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames))
            total_length = len(reader)

            print('Loading image features ..')
            for i, item in enumerate(reader):
                image_h = int(item['image_h'])
                image_w = int(item['image_w'])
                vfov = int(item['vfov'])
                long_id = _make_id(item['scanId'], item['viewpointId'])
                features[long_id] = np.frombuffer(base64.b64decode(item['features']),
                                                       dtype=np.float32).reshape((36, 2048))
                if is_blind:
                  features[long_id] = np.random.rand(36, 2048)

                  print_progress(i + 1, total_length, prefix='Progress:',
                                 suffix='Complete', bar_length=50)
    else:
        print('Image features not provided')
        features = None
        image_w = 640
        image_h = 480
        vfov = 60
    return features, (image_w, image_h, vfov)
def image_curation(num_threads=24):
    subsets = ['train', 'val']

    for subset in subsets:

        subdir = join(base_path, subset)

        with open(join(base_path, subset, 'list.txt')) as f:
            videos = f.readlines()

        if debug:
            videos = videos[:20]
        n_videos = len(videos)
        """
        for video in videos:
            crop_video(subdir, subset, video.replace("\n", ""))
        """

        with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
            fs = [
                executor.submit(crop_video, subdir, subset,
                                video.replace("\n", "")) for video in videos
            ]
            for i, f in enumerate(futures.as_completed(fs)):
                utils.print_progress(i,
                                     n_videos,
                                     prefix=subset,
                                     suffix='Done ',
                                     barLength=40)
Esempio n. 8
0
def count_interactions(A):
    logger.debug('loading {0}'.format(A.grof))
    univ = Universe(A.grof)
    logger.debug('loaded {0}'.format(A.grof))

    pro_atoms = univ.selectAtoms(
        'protein and not resname ACE and not resname NH2')
    pl = pro_atoms.residues.numberOfResidues()
    # +1: for missing resname ACE, such that it's easier to proceed in the next
    # step

    logger.debug('loading {0}, {1}'.format(A.grof, A.xtcf))
    u = Universe(A.grof, A.xtcf)
    logger.debug('loaded {0}, {1}'.format(A.grof, A.xtcf))

    # Just for reference to the content of query when then code was first
    # written and used
    # query = ('(resname PRO and (name CB or name CG or name CD)) or'
    #          '(resname VAL and (name CG1 or name CG2)) or'
    #          '(resname GLY and name CA) or'
    #          '(resname ALA and name CB)')

    query = A.query
    atoms = u.selectAtoms(query)
    logger.info('Number of atoms selected: {0}'.format(atoms.numberOfAtoms()))

    # MDAnalysis will convert the unit of length to angstrom, though in Gromacs
    # the unit is nm
    cutoff = A.cutoff * 10
    nres_away = A.nres_away
    btime = A.btime
    etime = A.etime
    nframe = 0
    unun_map = None
    for ts in u.trajectory:
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        nframe += 1
        map_ = np.zeros((pl + 1, pl + 1))  # map for a single frame
        for i, ai in enumerate(atoms):
            ai_resid = ai.resid
            for j, aj in enumerate(atoms):
                aj_resid = aj.resid
                # to avoid counting the same pair twices,
                # the 2 resid cannot be neigbors
                if i < j and aj_resid - ai_resid >= nres_away:
                    d = np.linalg.norm(ai.pos - aj.pos)
                    if d <= cutoff:
                        # -1: resid in MDAnalysis starts from 1
                        map_[ai_resid - 1][aj_resid - 1] += 1
        if unun_map is None:
            unun_map = map_
        else:
            unun_map = unun_map + map_
        utils.print_progress(ts)
    sys.stdout.write("\n")
    return unun_map / float(nframe)
Esempio n. 9
0
def image_curation(num_threads=24):
    subsets = os.listdir(train_base_path)

    for subset in subsets:
        subdir = join(train_base_path, subset)
        videos = os.listdir(subdir)

        if debug:
            videos = videos[:1]
        n_videos = len(videos)
        """
        for video in videos:
            crop_video(subdir, subset, video)
        """

        with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
            fs = [
                executor.submit(crop_video, subdir, subset, video)
                for video in videos
            ]
            for i, f in enumerate(futures.as_completed(fs)):
                utils.print_progress(i,
                                     n_videos,
                                     prefix=subset,
                                     suffix='Done ',
                                     barLength=40)
Esempio n. 10
0
def count_interactions(grof, xtcf, btime, etime, cutoff):
    cutoff = cutoff * 10  # * 10: convert from nm to angstrom to work with MDAnalysis
    u = Universe(grof, xtcf)
    query = ('(resname PRO and (name CB or name CG or name CD)) or'
             '(resname VAL and (name CG1 or name CG2)) or'
             '(resname GLY and name CA) or'
             '(resname ALA and name CB)')
    # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm
    atoms = u.selectAtoms(query)
    for ts in u.trajectory:
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        numcount = 0
        for i, ai in enumerate(atoms):
            for j, aj in enumerate(atoms):
                # to avoid counting the same pair twices,
                # the 2 resid cannot be neigbors
                if i < j and abs(ai.resid - aj.resid) >= 2:
                    d = np.linalg.norm(ai.pos - aj.pos)
                    if d <= cutoff:
                        numcount += 1
        yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount)
        utils.print_progress(ts)
Esempio n. 11
0
def calc_dihedral(grof, xtcf, btime, etime):
    # xtcf=None, so if only gro file is parsed, it still works
    univer = Universe(grof, xtcf)

    tets = select_dihedrals(univer)  # there should be a better name for tet

    # Write headers, hdr: header
    hdrs = []
    for k, tet in enumerate(tets):
        # ca1 + ca2 + (NO. peptide-bond)
        hdr = (utils.swap_aa_name(tet[0].resname) +
               utils.swap_aa_name(tet[-1].resname) + "{0:02d}".format(k + 1))
        # hdrs.append("{0:8s}".format(hdr))
        hdrs.append("{0:4s}".format(hdr))
    yield '#{0:8s}{1}\n'.format('t(ps)', ' '.join(hdrs))

    if not xtcf:
        yield calc_dih(tets)
    else:
        for ts in univer.trajectory:
            if btime > ts.time:
                continue
            if etime > 0 and etime < ts.time:
                break

            res = calc_dih(tets, ts.time)
            yield res
            utils.print_progress(ts)
Esempio n. 12
0
def ISTA(fx, gx, gradf, proxg, params):
    method_name = 'ISTA'
    print_start_message(method_name)

    # Parameter setup
    alpha = 1 / params['Lips']

    info = []

    x = params['x0']

    for k in range(params['maxit']):
        # Perform proximal gradient step
        x = proxg(x - alpha * gradf(x), alpha)

        # Record convergence
        F_x = fx(x) + gx(x)
        info.append(F_x)

        # Early stopping
        if stop({'F_x': F_x}, params):
            print('Early stop')
            return x, info

        if k % params['iter_print'] == 0:
            print_progress(k, params['maxit'], F_x, fx(x), gx(x))
    return x, info
Esempio n. 13
0
def calc_dihedral(grof, xtcf, btime, etime):
    # xtcf=None, so if only gro file is parsed, it still works
    univer = Universe(grof, xtcf)
    
    tets = select_dihedrals(univer)     # there should be a better name for tet

    # Write headers, hdr: header
    hdrs = []
    for k, tet in enumerate(tets):
        # ca1 + ca2 + (NO. peptide-bond)
        hdr = (utils.swap_aa_name(tet[0].resname) +
               utils.swap_aa_name(tet[-1].resname) + 
               "{0:02d}".format(k+1))
        # hdrs.append("{0:8s}".format(hdr))
        hdrs.append("{0:4s}".format(hdr))
    yield '#{0:8s}{1}\n'.format('t(ps)', ' '.join(hdrs))

    if not xtcf:
        yield calc_dih(tets)
    else:
        for ts in univer.trajectory:
            if btime > ts.time:
                continue
            if etime > 0 and etime < ts.time:
                break

            res = calc_dih(tets, ts.time)
            yield res
            utils.print_progress(ts)
Esempio n. 14
0
def image_curation(num_threads=24):

    subset_dirs = sorted(
        glob.glob(join(base_path, TRAIN_SUBSET_PREFIX + '*') + "[!zip]"))
    #subset_dirs.append(join(base_path, TEST_SUBSET_PREFIX))

    for subdir in subset_dirs:

        videos = sorted(os.listdir((join(subdir, 'videos'))))

        if debug:
            videos = videos[:1]
        n_videos = len(videos)

        subset = subdir.split('/')[-1]

        #for video in videos:
        #    crop_video(subdir, subset, video)

        with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
            fs = [
                executor.submit(crop_video, subdir, subset, video)
                for video in videos
            ]
            for i, f in enumerate(futures.as_completed(fs)):
                utils.print_progress(i,
                                     n_videos,
                                     prefix=subset,
                                     suffix='Done ',
                                     barLength=40)
Esempio n. 15
0
def main(args):
    """Main function for the testing pipeline

    :args: commandline arguments
    :returns: None

    """
    ##########################################################################
    #                             Basic settings                             #
    ##########################################################################
    exp_dir = 'experiments'
    model_dir = os.path.join(exp_dir, 'models')
    model_file = os.path.join(model_dir, 'best.pth')
    val_dataset = dataset.NCovDataset('data/', stage='val')
    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=1, shuffle=False, num_workers=11,
        drop_last=False)

    cov_net = model.COVNet(n_classes=args.n_classes)
    if torch.cuda.is_available():
        cov_net.cuda()

    state = torch.load(model_file)
    cov_net.load_state_dict(state.state_dict())

    with torch.no_grad():
        val_loss, metric_collects = evaluate_model(cov_net, val_loader)
    prefix = '******Evaluate******'
    utils.print_progress(mean_loss=val_loss, metric_collects=metric_collects,
                         prefix=prefix)
Esempio n. 16
0
def get_atom_and_bond_features(mols, mol_ids, dist_matrices):
    atom_features, bond_features = [], []
    bond_idx, atom_to_m_id, bond_to_m_id = [], [], []

    print('Get atom and bond features.')
    for it, m_name in enumerate(mols):
        print_progress(it, C.N_MOLS)
        m_id, mol = mol_ids[m_name], mols[m_name]
        dist_matrix = dist_matrices[m_name]
        n_atoms, n_bonds = mol.GetNumAtoms(), mol.GetNumBonds()

        atom_features.append(get_atom_features(mol, dist_matrix))

        e_feats, b_idx = get_bond_features(mol, dist_matrix)
        bond_features.append(e_feats)
        bond_idx.append(b_idx)

        atom_to_m_id.append(np.repeat(m_id, n_atoms))
        bond_to_m_id.append(np.repeat(m_id, n_bonds))

    atom_features = pd.DataFrame(np.concatenate(atom_features),
                                 columns=C.ATOM_FEATS)
    bond_features = pd.DataFrame(np.concatenate(bond_features),
                                 columns=C.BOND_FEATS)
    bond_idx = np.concatenate(bond_idx)
    bond_features['idx_0'] = bond_idx[:, 0]
    bond_features['idx_1'] = bond_idx[:, 1]
    atom_features['molecule_id'] = np.concatenate(atom_to_m_id)
    bond_features['molecule_id'] = np.concatenate(bond_to_m_id)

    return atom_features, bond_features
Esempio n. 17
0
def load_features(feature_store, whether_img_feat):
    def _make_id(scanId, viewpointId):
        return scanId + '_' + viewpointId

    # if the tsv file for image features is provided
    if feature_store and whether_img_feat:
        tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features']
        features = {}
        with open(feature_store, "r") as tsv_in_file:
            print('Reading image features file %s' % feature_store)
            reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames))
            #reader = reader[1:]
            total_length = len(reader)

            print('Loading image features ..')
            for i, item in enumerate(reader):
                image_h = int(item['image_h'])
                image_w = int(item['image_w'])
                vfov = int(item['vfov'])
                long_id = _make_id(item['scanId'], item['viewpointId'])
                features[long_id] = np.frombuffer(base64.b64decode(item['features']),
                                                       dtype=np.float32).reshape((36, 2048))
                print_progress(i + 1, total_length, prefix='Progress:',
                               suffix='Complete', bar_length=50)
            #features['17DRP5sb8fy_10c252c90fa24ef3b698c6f54d984c5c'] = np.zeros((36, 152)
    else:
        print('Image features not provided')
        features = None
        image_w = 640
        image_h = 480
        vfov = 60
    return features, (image_w, image_h, vfov)
Esempio n. 18
0
def main():
    raw_list = csv_to_list(csv_file)[:100]
    total_len = len(raw_list)
    counter = 0
    result_dict = dict()
    print "Commencing Web Scraping..."
    start_time = time.time()
    for raw_link in raw_list:
        try:
            raw_link = raw_link[0]
            whois_link = "http://www.whois.com/whois/" + raw_link
            ipaddress_link = "http://" + raw_link + ".ipaddress.com/"
            whois_soup = link_to_lxmlsoup(whois_link)
            ipaddress_soup = link_to_lxmlsoup(ipaddress_link)
            result_dict.setdefault('Raw Link', []).append(str(raw_link))
            result_dict = whois_parser(whois_soup, result_dict)
            result_dict = ipaddress_parser(ipaddress_soup, result_dict)
            counter, total_len = print_counter(counter, total_len)
            if counter % 400 == 0:
                print "Commencing 30 Second Sleep after 400 iterations"
                time.sleep(30)
            time_elapsed = time.time() - start_time
            print_progress(time_elapsed, counter, total_len)
        except:
            dict_to_json(result_dict, 'output.json')
            dict_to_csv(result_dict, 'output.csv')
            print "Unexpected Error", sys.exc_info()[0]
            raise
    dict_to_json(result_dict, 'output.json')
    dict_to_csv(result_dict, 'output.csv')
def read_data(data_dir, max_files=MAX_NUM_FILES):
    # prepare
    data = []

    # sort files for reproducibility
    files = os.listdir(data_dir)
    files = sorted(files)
    random.Random(SHUFFLE_SEED).shuffle(files)

    if max_files > 0:
        files = files[:int(max_files)]

    print('Reading files...')
    for index, file_name in enumerate(files):
        if not index % PROGRESS_ITER:
            print_progress(index, files)
        file_path = os.path.join(data_dir, file_name)
        if os.path.isfile(file_path):
            # read array and parse all elements to float, keep NaNs as nan value
            contents = pd.read_csv(file_path, delimiter='|').values
            patient = PatientData(sequence_x=contents[:, :-1],
                                  sequence_y=contents[:, -1:])
            data.append(patient)

    print_progress(len(files), files)
    return data
Esempio n. 20
0
def count_interactions(grof, xtcf, btime, etime, cutoff):
    cutoff = cutoff * 10 # * 10: convert from nm to angstrom to work with MDAnalysis
    u = Universe(grof, xtcf)
    query = ('(resname PRO and (name CB or name CG or name CD)) or'
             '(resname VAL and (name CG1 or name CG2)) or'
             '(resname GLY and name CA) or'
             '(resname ALA and name CB)')
    # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm
    atoms = u.selectAtoms(query)
    for ts in u.trajectory:
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        numcount = 0
        for i, ai in enumerate(atoms):
            for j, aj in enumerate(atoms):
                # to avoid counting the same pair twices,
                # the 2 resid cannot be neigbors
                if i < j and abs(ai.resid - aj.resid) >= 2: 
                    d = np.linalg.norm(ai.pos - aj.pos)
                    if d <= cutoff:
                        numcount += 1
        yield '{0:10.0f}{1:8d}\n'.format(ts.time,  numcount)
        utils.print_progress(ts)
Esempio n. 21
0
    def pretrain(self, env):
        test_s, test_q_c, test_w = self._get_batch(env, config.PRETRAIN_BATCH)

        last_loss = 9999.
        for i in range(config.PRETRAIN_EPOCHS):
            utils.print_progress(i, config.PRETRAIN_EPOCHS, step=100)

            # print loss
            if utils.is_time(i, 100):
                lr = config.PRETRAIN_LR / (1 + i / 100)
                self.model.set_lr(lr)

                q = self.model(test_s)
                loss_c = self.model.get_loss_c(q, test_q_c, test_w)
                print("\nLoss: {:.4e} LR: {:.2e}".format(loss_c.data.item(), lr))

                if last_loss <= loss_c:     # stop early
                    break

                last_loss = loss_c

            s, q_c, w = self._get_batch(env, config.PRETRAIN_BATCH)
            self.model.train_c(s, q_c, w)

        self.model_.copy_weights(self.model, rho=1.0)
def main(args_dict):
    # Extract configuration
    MK = args_dict['MK']

    # Construct Robust Bayesian regression model
    sigma = 2 * np.ones(1)
    bounder = robustbayesregr.Bounder()
    splitter = robustbayesregr.Splitter()
    proposal = robustbayesregr.IsotropicGaussian(1, sigma)
    np.random.seed(0)
    x, y = robustbayesregr.generate_data(1000)
    target = robustbayesregr.CauchyRegression(x, y, sigma)

    # Obtain MK samples (and their corresponding MAP values) using A* sampling implementation
    samples = np.empty((MK)).squeeze()
    MAPs = []
    for i in range(MK):
        stream = astar.astar_sampling_iterator(target, proposal, bounder,
                                               splitter)
        X, G = stream.next()
        samples[i] = X
        MAPs.append(G[0] - EULER)
        if i % 1 == 0:
            print_progress('Sampled %d / %d' % (i + 1, MK))
    print('')
    lnZ = float(np.log(target.z()))

    # Dump true ln(Z) and MAP values to JSON file
    data = {'lnZ': lnZ, 'MAPs': MAPs}
    savepath = 'data/astar_rbr_MK%d.json' % (MK)
    json_dump(data, savepath, indent=None)
    print('Saved %d samples to %s' % (len(MAPs), savepath))
Esempio n. 23
0
def estimate_MSE_vs_alpha(transform, Ms, alphas, K):
    # Without loss of generality
    Z = 1
    tZ = transform(Z)
    
    # Estimate MSEs by constructing estimators K times
    MSEs = np.empty((len(Ms), len(alphas)))
    MSEs_stdev = np.empty((len(Ms), len(alphas)))
    for Mi, M in enumerate(Ms):
        # Compute means (K x alphas) in a loop, as otherwise
        # this runs out of memory with K = 100,000.
        means = np.empty((K, len(alphas)))
        for ai, alpha in enumerate(alphas):
            Ws = np.power(np.random.exponential(1.0, size=(K, M)), alpha)   # (K, M)
            means[:, ai] = np.mean(Ws, axis=1)
            print_progress('M = %d: done %.0f%%' % (M, 100.0 * (ai+1) / len(alphas)))
        print('')

        g = np.power(gamma(1.0 + alphas), 1.0 / alphas)         # (alphas)
        tZ_hats = transform(g * np.power(means, -1.0/alphas))   # (K, alphas)
        SEs = (tZ_hats - tZ) ** 2                               # (K, alphas)
        MSEs[Mi] = np.mean(SEs, axis=0)                         # (alphas)
        MSEs_stdev[Mi] = np.std(SEs, axis=0) / np.sqrt(K)       # (alphas)

    return MSEs, MSEs_stdev
Esempio n. 24
0
    def run(self, content_urls):
        responses = []
        total = len(content_urls)
        count = 0

        start = time.time()
        for url in content_urls:
            rrl = utils.RequestRateLimiterFactory(self.request_rate)()
            if count is 0:
                print(f'[WARNING] rate_limited _call_api(): max_per_Second:{self.request_rate}')

            try:
                request_url = f'{url}{self.query_params}' if self.query_params is not None else url
                res = rrl.make_rate_limited_request(request_url, self.request_verb, headers=self.headers)
                parsed_res = self.res_callback(res=res, url=url, remaining_count=total-count, rrl=rrl)
            except Exception as e:
                # TODO: could provide 'skip' vs 'abandon' option for exceptions
                print(f'[WARNING] Exception thrown - skipping {url}')
                print(e)
                parsed_res = []

            count += 1
            responses.append(parsed_res)
            utils.print_progress(count, total, urlparse(url).hostname)

        end = time.time()
        print(f'{sum([len(res) for res in responses])} items scraped from {count} urls in {end-start:.2f}s.')

        return responses
Esempio n. 25
0
def ista(fx, gx, gradf, proxg, params):
    method_name = 'ISTA'
    print_start_message(method_name)

    tic = time.time()

    # Parameter setup
    lmbd = params['lambda']
    alpha = 1 / params['Lips']

    X = params['x0']

    run_details = {'X_final': None, 'conv': np.zeros(params['maxit'] + 1)}
    run_details['conv'][0] = fx(params['x0']) + lmbd * gx(params['x0'])

    for k in range(1, params['maxit'] + 1):
        # Perform proximal gradient step
        X = proxg(X - alpha * gradf(X), alpha * lmbd)
        # Record convergence
        run_details['conv'][k] = fx(X) + lmbd * gx(X)

        if k % params['iter_print'] == 0:
            print_progress(k, params['maxit'], run_details['conv'][k], fx(X),
                           gx(X))

    run_details['X_final'] = X
    print_end_message(method_name, time.time() - tic)
    return run_details
def get_atom_and_bond_features(mols, mol_ids, dist_matrices):       # mol_ids: {}
    atom_features, bond_features = [], []
    bond_idx, atom_to_m_id, bond_to_m_id = [], [], []
    print('Get atom and bond features.')
    for it, m_name in enumerate(mols):
        print_progress(it, C.N_MOLS)
        m_id, mol = mol_ids[m_name], mols[m_name]                   # m_id: int
        dist_matrix = dist_matrices[m_name]                         # 获取分子的距离矩阵
        n_atoms, n_bonds = mol.GetNumAtoms(), mol.GetNumBonds()     # 根据mol结构文件获取分子中 原子的个数和 键的个数
                                                                    # mol.GetNumAtoms(), mol.GetNumBonds()
        atom_features.append(get_atom_features(mol, dist_matrix))   # 针对每个分子文件,获取分子中原子特征

        e_feats, b_idx = get_bond_features(mol, dist_matrix)        # 针对每个分子文件,获取键的特征
        bond_features.append(e_feats)
        bond_idx.append(b_idx)                                      # b_idx: array(n_bonds, 2)

        atom_to_m_id.append(np.repeat(m_id, n_atoms))               # 根据原子个数产生对应个数的分子id, [array, array, array, ...]
        bond_to_m_id.append(np.repeat(m_id, n_bonds))               # 根据键个数产生对应个数的分子id
    atom_features = pd.DataFrame(
        np.concatenate(atom_features), columns=C.ATOM_FEATS)   # atom_features:[np.array(n_atoms*atom_feats), np.array(), ...]
    bond_features = pd.DataFrame(
        np.concatenate(bond_features), columns=C.BOND_FEATS)   # bond_features:[np.array(n_bonds*bond_feats), np.array(), ...]
    bond_idx = np.concatenate(bond_idx)                        # bond_idx: [np.array(n_bonds*2), np.array(), ...]
    bond_features['idx_0'] = bond_idx[:,0]                     # 给bond_features——df添加成键原子的索引
    bond_features['idx_1'] = bond_idx[:,1]                     #
    atom_features['molecule_id'] = np.concatenate(atom_to_m_id)   # 给atom_features--df添加列
    bond_features['molecule_id'] = np.concatenate(bond_to_m_id)   # 给bond_features--df添加列

    return atom_features, bond_features                           # 返回 atom_features--df, bond_features--df
Esempio n. 27
0
    def __init__(
        self,
        opts,
        features,
        img_spec,
        batch_size=64,
        seed=10,
        splits=["train"],
        tokenizer=None,
    ):
        self.env = PanoEnvBatch(features, img_spec, batch_size=batch_size)
        self.data = []
        self.scans = []
        self.opts = opts

        print("Loading {} dataset".format(splits[0]))

        json_data = load_datasets(splits)
        total_length = len(json_data)

        # iteratively load data into system memory
        for i, item in enumerate(json_data):

            if not is_experiment() and i >= 20:
                break  # if this is in developing mode, load only a small amount of data

            # Split multiple instructions into separate entries
            for j, instr in enumerate(item["instructions"]):
                self.scans.append(item["scan"])
                new_item = dict(item)
                new_item["instr_id"] = "%s_%d" % (item["path_id"], j)
                new_item["instructions"] = instr
                if tokenizer:
                    if (
                            "instr_encoding" not in item
                    ):  # we may already include 'instr_encoding' when generating synthetic instructions
                        new_item["instr_encoding"] = tokenizer.encode_sentence(
                            instr)
                    else:
                        new_item["instr_encoding"] = item["instr_encoding"]
                self.data.append(new_item)
            print_progress(
                i + 1,
                total_length,
                prefix="Progress:",
                suffix="Complete",
                bar_length=50,
            )

        self.scans = set(self.scans)
        self.splits = splits
        self.seed = seed
        random.seed(self.seed)
        random.shuffle(self.data)
        self.ix = 0
        self.batch_size = batch_size
        self._load_nav_graphs()
        print("R2RBatch loaded with %d instructions, using splits: %s" %
              (len(self.data), ",".join(splits)))
Esempio n. 28
0
def __gen_hist__(plot_details, verbose=False):
    '''
    generate single histogram
    only runs on samples with an input file defined, 
    ie. does not loop through daughters
    not really designed to be called by analysers
    '''
    log.debug('in get_hist for %s' % plot_details.sample.name)

    ## add the details from the sample
    #plot_details += plot_details.sample.plot_details

    s = plot_details.sample
    vd = plot_details.var_details
    sel = plot_details.get_selector()
    weights = plot_details.get_weights()
    target_lumi = plot_details.target_lumi

    if not s.is_active():
        log.warn('failed to gen hist %s for sample: %s' % (vd.name, s.name))
        return None

    ## create hist
    h = plot_details.new_hist(s)
    h.samples = s

    ## initialise sample
    s.initialise()

    ## return empty hist if no events
    if not s.tree.GetEntries(): return h

    s.switch_on_branches()

    ## selection
    event_list = sel.select(s) if sel else None

    ## initialise variables
    s.prepare(plot_details)

    ## process events
    entries = event_list.GetN() if sel else s.tree.GetEntries()
    for i in xrange(entries):
        ientry = event_list.GetEntry(i) if sel else i
        s.tree.GetEntry(ientry)

        ## calculate event weight
        weight = weights.weight() if weights else 1.

        ## fill hists
        val = vd.var.calc_vals()[0]

        h.Fill(val, weight)
        frac = float(i) / float(entries) if entries else 0.0
        if verbose: utils.print_progress(frac, title='%s: ' % s.name)
    if verbose: utils.clear_progress()
    #s.style_hist(h)
    if target_lumi: s.scale_hist(h, target_lumi)
    return h
Esempio n. 29
0
def count_interactions(A):
    logger.debug('loading {0}'.format(A.grof))
    univ = Universe(A.grof)
    logger.debug('loaded {0}'.format(A.grof))

    pro_atoms = univ.selectAtoms('protein and not resname ACE and not resname NH2')
    pl = pro_atoms.residues.numberOfResidues()
    # +1: for missing resname ACE, such that it's easier to proceed in the next
    # step

    logger.debug('loading {0}, {1}'.format(A.grof, A.xtcf))
    u = Universe(A.grof, A.xtcf)
    logger.debug('loaded {0}, {1}'.format(A.grof, A.xtcf))

    # Just for reference to the content of query when then code was first
    # written and used
    # query = ('(resname PRO and (name CB or name CG or name CD)) or'
    #          '(resname VAL and (name CG1 or name CG2)) or'
    #          '(resname GLY and name CA) or'
    #          '(resname ALA and name CB)')

    query = A.query
    atoms = u.selectAtoms(query)
    logger.info('Number of atoms selected: {0}'.format(atoms.numberOfAtoms()))

    # MDAnalysis will convert the unit of length to angstrom, though in Gromacs
    # the unit is nm
    cutoff = A.cutoff * 10
    nres_away = A.nres_away
    btime = A.btime
    etime = A.etime
    nframe = 0
    unun_map = None
    for ts in u.trajectory:
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        nframe += 1
        map_ = np.zeros((pl+1, pl+1))                   # map for a single frame
        for i, ai in enumerate(atoms):
            ai_resid = ai.resid
            for j, aj in enumerate(atoms):
                aj_resid = aj.resid
                # to avoid counting the same pair twices,
                # the 2 resid cannot be neigbors
                if i < j and aj_resid - ai_resid >= nres_away:
                    d = np.linalg.norm(ai.pos - aj.pos)
                    if d <= cutoff:
                        # -1: resid in MDAnalysis starts from 1
                        map_[ai_resid-1][aj_resid-1] += 1
        if unun_map is None:
            unun_map = map_
        else:
            unun_map = unun_map + map_
        utils.print_progress(ts)
    sys.stdout.write("\n")
    return unun_map / float(nframe)
Esempio n. 30
0
def generate_svg_stack(dataset_name, X, classIds, n, data_size):
    outfile = '../data/imgs/{}.svg'.format(dataset_name)
    with open(outfile, "w") as svgFile:
        svgFile.write(svgMetaData)
        for i in range(n):
            utils.print_progress(i, n)
            figData = generate_figure_data(X[i], classIds[i], data_size)
            svgFile.write(svgImgTag.format(i, i, figData))
        svgFile.write("</svg>")
    def train(self, inputs, targets, lr=1, batch_size=30, epochs=100, plot=False, kernel='linear'):

        self.batch_size = batch_size
        # init the kernel
        self.set_kernel(kernel)

        # set optimization method (Gradient Descent)
        self.optimization = tf.train.GradientDescentOptimizer(lr)
        self.training_step = self.optimization.minimize(self.loss)
        self.init = tf.global_variables_initializer()
        self.session.run(self.init)

        # set training data
        train_inputs, train_target = inputs, targets

        # performance tracking
        train_loss_result, train_accuracy_result = [], []

        # for each epoch
        for i in range(epochs):

            # generate random indexes for each batch
            batch_index = np.random.choice(len(train_inputs), size=batch_size)
            self.session.run(self.training_step, feed_dict={self.inputs: train_inputs[batch_index],
                                                            self.target: train_target[:, batch_index]})
            # if plotting, record every epoch
            if plot:
                # record accuracy
                train_accuracy, train_loss = self.generate_step_tracking_data(
                    train_inputs[batch_index], train_target[:, batch_index])
                train_accuracy_result.append(train_accuracy)
                train_loss_result.append(train_loss)

            if (i+1) % (epochs / 5) == 0:
                # if not plotting, get intermittent accuracy and loss
                if not plot:
                    # record accuracy
                    train_accuracy, train_loss = self.generate_step_tracking_data(
                        train_inputs[batch_index], train_target[:, batch_index])
                utl.print_progress(i, epochs, train_loss, train_accuracy)

        # plot results
        if plot:
            if not self.features == 2:
                print('Plotting only supported for 2 feature data sets... skipping output')
            else:
                utl.plot_loss(train_loss_result)
                utl.plot_accuracy(train_accuracy_result)
                grid = utl.generate_grid(train_inputs)
                grid_predictions = self.session.run(self.prediction, feed_dict={self.inputs: train_inputs[batch_index],
                                                                                self.target: train_target[:, batch_index],
                                                                                self.grid: grid})
                # plot the result grid
                utl.plot_result(grid_predictions, inputs, targets)

        # commit data points for the last support vectors used
        self.support_vector_data = [train_inputs[batch_index], train_target[:, batch_index]]
Esempio n. 32
0
def evaluate_checkpoint(P, postfix, data, word2id, checkpoint_fname, epoch,
                        device):
    """
  
  """
    # Check for checkpoints
    _ = load_checkpoint(P, model, opt, device, checkpoint_fname)

    response_filename = "{}response_str_{}.txt".format(P.EVAL_DIR, postfix)
    if os.path.isfile(response_filename):
        print("Skipping {}, it does already exist.".format(response_filename))
        return
    saver_response_str = sentences_saver(response_filename)

    print()
    total_decoder_loss = 0
    total_saliency_loss = 0

    start_time = time()
    for batch_num, batch in enumerate(data):
        # Get batch
        input, target, templates, target_saliencies = unpack_batch(
            batch, device)

        saliency, response = model(input, target, templates)

        decoder_target = [t[1:] for t in target]  # Cut <BOS> from target
        decoder_loss = torch.stack([
            decoder_loss_fn(res, tar)
            for res, tar in zip(response, decoder_target)
        ]).mean()

        if P.USE_BILINEAR:
            # Only when the bilinear is used, there is a sailency loss.
            saliency_loss = torch.stack([
                saliency_loss_fn(sal, true_sal)
                for sal, true_sal in zip(saliency, target_saliencies)
            ]).mean()
            total_saliency_loss += saliency_loss.item()
        total_decoder_loss += decoder_loss.item()

        for inp, templ, targ in zip(input, templates, target):
            response, _ = model.respond(device,
                                        word2id, [inp], [templ],
                                        max_length=50)

            # Write the results to txt files
            saver_response_str.store_sentence(word2id.id2string(response))

        print_progress("Evaluating: ", P, epoch - 1, batch_num, len(data),
                       total_saliency_loss / (batch_num + 1),
                       total_decoder_loss / (batch_num + 1), start_time)

    print()
    saver_response_str.write_to_file()
Esempio n. 33
0
 def run(self, filename):
     firmwarefilename = filename
     name = firmwarefilename.remove(".new.dat")
     transferfilename = name + ".transfer.list"
     if not os.path.exists(transferfilename):
         print(f"Couldn't find needed {transferfilename}.")
         exit(0)
     with open(transferfilename, 'r') as tfr:
         with open(firmwarefilename, 'rb') as qr:
             version = int(tfr.readline().replace("\n", ""))
             if version > 3:
                 print(f"Error, version {str(version)} not supported.")
                 exit(0)
             with open(name + ".bin", "wb") as qw:
                 totalblocks = int(tfr.readline().replace("\n", ""))
                 blocksize = 4096
                 buffersize = 0x200000
                 ip = tfr.readline()
                 command = ip.split(" ")[0]
                 ip = ip.split(" ")[1]
                 values = ip.split(",")
                 print_progress(0,
                                100,
                                prefix='Progress:',
                                suffix='Complete',
                                bar_length=50)
                 if command == "new":
                     count = int(values[0])
                     old = 0
                     for i in range(0, count / 2):
                         start = int(values[1 + (i * 2)])
                         end = int(values[2 + (i * 2)])
                         length = (end - start) * blocksize
                         for pos in range(0, (blocksize * start), 4096):
                             qw.write(b"\x00" * 4096)
                             total = length
                         while length > 0:
                             size = buffersize
                             if size > length:
                                 size = length
                             buffer = qr.read(size)
                             qw.write(buffer)
                             length -= size
                             prog = int(
                                 float(i) / float(total) * float(100))
                             if (prog > old):
                                 print_progress(prog,
                                                100,
                                                prefix='Progress:',
                                                suffix='Complete',
                                                bar_length=50)
                                 old = prog
                 elif command == "erase":
                     pass
Esempio n. 34
0
def main():
    # get all image names
    images = glob.glob('{}/*.jpg'.format(PATH_TO_IMAGES))

    for i, image in enumerate(images):
        rotate_image(image, 'right')
        rotate_image(image, 'left')
        rotate_image(image)
        print_progress((i + 1) / len(images))

    print('\nRotated {} images'.format(len(images)))
Esempio n. 35
0
def clear_table(db, name_arr=[]):
    print('Cleaning Tables')
    c = db.cursor()
    total = len(name_arr)
    count = 0
    for n in name_arr:
        count += 1
        c.execute("DELETE FROM " + n)
        c.execute("ALTER TABLE " + n + " AUTO_INCREMENT = 1")
        print_progress(count, total)
    db.commit()
Esempio n. 36
0
def sequence_spacing(grof, xtcf, btime, etime, peptide_length, atom_sel):
    u = Universe(grof, xtcf)
    # this selection part should be better customized
    # here, only have been backbone atoms are used, u.selectAtoms doesn't
    # include Hydrogen atoms
    # REMMEMBER: ARGS verification should be done in main ONLY!
    # range works like this:

    # in MDAnalysis, resid starts from 1, in sequence_spacing.py, we don't count
    # the C- and N- termini, so it's from 2 to peptide_len+2
    residues = [u.selectAtoms(atom_sel.format(i)) for i in range(2, peptide_length + 2)]
    ijdist_dict = {}
    for ts in u.trajectory:
        # btime, etime defaults to 0, if etime is 0, loop till the end of the
        # trajectory
        if btime > ts.time:
            continue
        if etime > 0 and etime < ts.time:
            break

        # the good stuff
        for i, resi in enumerate(residues):
            for j, resj in enumerate(residues):
                # to remove duplicate since resi & resj are within the same peptide 
                if i < j:
                    dij = abs(i - j)
                    d_atomi_atomj = []
                    # loop through every atom in both residues
                    for atomi in resi:
                        for atomj in resj:
                            d_atomi_atomj.append(
                                np.linalg.norm(atomi.pos - atomj.pos))
                # add the result to the dictionary
                    ij_dist = np.average(d_atomi_atomj)   # distance between i and j
                    if dij not in ijdist_dict.keys():
                        ijdist_dict[dij] = [ij_dist]
                    else:
                        ijdist_dict[dij].append(ij_dist)
        utils.print_progress(ts)

    return ijdist_dict
Esempio n. 37
0
if search_term:
    params["jql"] = params["jql"] + " AND text~'%s'" % search_term
if ticket_id:
    params["jql"] = params["jql"] + " AND issue='%s'" % ticket_id

params["maxResults"] = config._MAX_SEARCH_RESULTS
params["fields"] = ["summary",]

search_result =  utils.get_request_auth(config._SEARCH, params)

if "issues" in search_result and len(search_result["issues"]) != 0:
    print("\nTicket found:")
    issues = search_result["issues"]
    for issue in issues:
        print("%s -> %s" % (issue["key"], issue["fields"]["summary"]))
        
    print("\nWould you liket to tag/label the above ticket(s) with '%s'?" % search_term)
    prompt = input("Proceed? [y/n] ")
    if prompt == "n":
        pass
    elif prompt == "y":
        for index, issue in enumerate(issues, start=1):
            payload = utils.add_label_payload(search_term)
            if utils.put_request_auth(config._ISSUE_BY_KEY % issue["key"], payload):
                utils.print_progress(index, len(issues))
else:
    print("\nNo tickets")

print("\n\nSee you next time!")
print("Have a good day!")