コード例 #1
0
# Connect to MongoDB
connection = MongoClient("mongodb://localhost")
db = connection.bundestagswahl.tweets

# Set maximum tweets
maxtweets = 5000

# Temporary dictionaries
original_tweets = {'documents': []}
tidied_tweets = {'documents': []}

# Use a progress bar in your shell (for larger data sets.)
pbar1 = ProgressBar(widgets=['Analyze Tweets: ',
                             Percentage(),
                             Bar()],
                    maxval=maxtweets).start()
pbar2 = ProgressBar(widgets=['Write Excel File: ',
                             Percentage(),
                             Bar()],
                    maxval=maxtweets).start()


def tidy_tweet(tweet):
    '''
      Helper function to tidy the tweet text. 
      The regex removes special characters and links, etc.
      '''
    return ' '.join(
        re.sub(
            "(@[A-Za-z0-9äöüÄÖÜß]+)|([^0-9A-Za-z9äöüÄÖÜß \t])|(\w+:\/\/\S+)",
コード例 #2
0
def NodeDic(results, edge_info, node_info):
    '''
    Function takes the results of running a query, NETS edge label information, and a list of node information (list[0]
    contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples). The
    function returns a list of dictionaries where list[0] contains a nested dictionary where keys are bio entity
    identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary
    where the bio node is the key and the value is a set of possible NETS node types for that node.
    :param results: json file containing the query results from endpoint
    :param edge_info: dictionary where the keys are the NETS edges and the values are the edge labels
    :param node_info: a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the
    contains the NETS nodes identifier triples)
    :return: a list of dictionaries: list[0] contains a nested dictionary where keys are bio entity identifiers and the
    values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is
    the key and the value is a set of possible NETS node types for that node
    '''

    print 'Start building OWL-NETs metadata dictionary'

    # creates a map to store NETS node type information
    node_type = {}

    # creates a map to identify which query variables represent the BIO world ID, label, and ICE ID
    node_labeler = {}

    # assign variables needed for node dictionary
    NETS = set([x.strip('?') for y in edge_info[0].keys() for x in y])
    labels = [[
        re.sub('[?|"\n"]', '',
               x.split(' ')[0]),
        re.sub('[?|"\n"]', '',
               x.split(' ')[2])
    ] for x in node_info[0]]
    ids = [[x.split(' ')[0].strip('?'),
            x.split(' ')[2].strip('?')] for x in node_info[1]]

    # initialize progress bar progress bar
    widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')]
    pbar = ProgressBar(widgets=widgets, maxval=len(NETS))

    for node in pbar(NETS):
        node_labeler[node] = {}

        for res in results['results']['bindings']:
            node_key = str(res[node]['value'])
            label_value = str([x[1] for x in labels
                               if x[0] == node][0].encode('utf8'))
            id_value = str([x[0] for x in ids
                            if x[1] == node][0].encode('utf8'))

            # NODE TYPE: setting node type information
            if node_key in node_type.keys():
                node_type[node_key].add(node)

            else:
                node_type[node_key] = set()
                node_type[node_key].add(node)

            # NODE METADATA: setting node attributes by NETS node type
            if node_key in node_labeler[node].keys():
                # order matters - not using a set so that each ICE can be mapped to the label with the same index
                node_labeler[node][node_key]['label'].append(
                    res[label_value]['value'].encode('utf8'))
                node_labeler[node][node_key]['id'].append(
                    res[id_value]['value'].encode('utf8'))

            else:
                node_labeler[node][node_key] = {}
                node_labeler[node][node_key]['label'] = [
                    res[label_value]['value'].encode('utf8')
                ]
                node_labeler[node][node_key]['id'] = [
                    res[id_value]['value'].encode('utf8')
                ]

    # close progress bar
    pbar.finish()
    print 'Finished building OWL-NETs metadata dictionary'
    print '\n'

    # CHECK: verify that the counts are correct
    for node in NETS:
        res_count = set()
        for res in results['results']['bindings']:
            res_count.add(res[node]['value'])

        if len(node_labeler[node].keys()) != len(
                res_count):  # verify the number of nodes in graph is correct
            raise ValueError('The count of results for the ' + str(node) +
                             ' NETS node in the node dictionary differ '
                             'from the query output')

    return node_labeler, node_type
コード例 #3
0
    deadr_state = numpy.zeros((3, 1))
    control = numpy.zeros((1, 2))
    observation = numpy.zeros((len(RFID), 1))
    time = 0.0

    truth_trajectory = []
    deadr_trajectory = []
    est_trajectory = []

    fig = plt.figure(facecolor="w")
    #ax_arrow = fig.add_subplot(111, aspect='equal')
    ax_circle = fig.add_subplot(111, aspect='equal')
    ax_trajectory = fig.add_subplot(111, aspect='equal')

    num_of_loop = int(ENDTIME / DELTATIME)
    pbar = ProgressBar(widgets=[Percentage(), Bar()], max_value=num_of_loop)

    # Main loop
    for i in range(num_of_loop):
        # Calculation ground truth
        time = time + DELTATIME
        control = control_model(time)
        truth_state = process_model(x=truth_state,
                                    u=control,
                                    delta_time=DELTATIME)

        # Calculation dead reckoning
        deadr_state = process_model(
            x=deadr_state,
            u=(control + simulation_process_cov.dot(numpy.random.randn(2, 1))),
            delta_time=DELTATIME)
コード例 #4
0
def get_progress_bar(maxval):
    widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()]
    return ProgressBar(widgets=widgets, maxval=maxval)
コード例 #5
0
im = imread(measurementFiles[0])
#im = wiener(im)
dim = shape(im)

densityVals = -log(im[20:dim[0]-20:1,20:dim[1]-20:1,:])
dim = shape(densityVals)

pixelCalcWidth = 20;

T = zeros([dim[0]/pixelCalcWidth,dim[1]/pixelCalcWidth])
D = zeros([dim[0]/pixelCalcWidth,dim[1]/pixelCalcWidth])
w = zeros([pixelCalcWidth,pixelCalcWidth,3])
OD = zeros([pixelCalcWidth,pixelCalcWidth,3])


widgets = ['Dose calc: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
           ' ', ETA(), ' ', FileTransferSpeed()]
pbar = ProgressBar(widgets = widgets, maxval=dim[0]/pixelCalcWidth).start() 



for i in range(dim[0]/pixelCalcWidth):
	idist = i*pixelCalcWidth
	
	for j in range(dim[1]/pixelCalcWidth):  
		jdist = j*pixelCalcWidth
		
		OD = densityVals[idist:idist+pixelCalcWidth, jdist:jdist+pixelCalcWidth, :]
		w[:,:,0] = 1/density2DoseSigma(OD[:,:,0],red,redSig)
		w[:,:,1] = 1/density2DoseSigma(OD[:,:,1],green,greenSig)
		w[:,:,2] = 1/density2DoseSigma(OD[:,:,2],blue,blueSig)
コード例 #6
0
ファイル: noisify.py プロジェクト: wang-na/audioprocessing
def noisify(meta_filename, noise_filename, noise_name, noise_percent):
    is_noise, data_noise, sr_noise_orig = check_noise(noise_filename)
    if is_noise:
        sr_noise = sr_noise_orig
        source_dir = os.path.dirname(meta_filename)
        wavs_dir = os.path.join(source_dir, 'wavs')
        out_dir = os.path.join(source_dir, 'wavs', noise_name)
        meta_outname = os.path.join(source_dir, 'metadata-{}.csv'.format(noise_name))
        meta_data = codecs.open(meta_filename, 'r', 'utf-8').readlines()
        num_entries = len(meta_data)
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        widgets=[FormatLabel('File: %(message)s [Iter: %(value)s/'+str(num_entries)+']'), ' ', Percentage(), ' ', Bar(marker='@', left='[', right=']'), ' ', ETA()]
        pBar = ProgressBar(widgets=widgets, maxval=num_entries).start()  
        out_meta = []
        for i, line in enumerate(meta_data):
            filename, orig_text, clean_text = line.strip().split('|')
            pBar.update(i, filename)
            infile = os.path.join(wavs_dir, filename + '.wav')
            outfile = os.path.join(out_dir, filename + '.wav')
            data_audio, sr_audio = sf.read(infile, dtype='int16')
            if sr_audio != sr_noise:
                noise_data = resample_noise_file(sr_audio, sr_noise, data_noise)
                sr_noise = sr_audio
            create_noise(outfile, infile, data_audio, noise_data, sr_audio, noise_percent)
            out_meta.append(u'{}/{}|{}|{}'.format(noise_name, filename, orig_text, clean_text))
        pBar.finish()
        print('Saving new metadata... ', end='')
        sys.stdout.flush()
        outf = codecs.open(meta_outname, 'w', 'utf-8')
        for l in out_meta:
            print(l, file=outf)
        outf.close()
        print('done')
        print('Added meta-file: {}'.format(meta_outname))
    else:
        print('Invalid Noise data. Exiting!')
コード例 #7
0
def main(source=None,
         num_epochs=None,
         method=None,
         batch_size=None,
         learning_rate=None,
         beta=None,
         image_dir=None,
         binary_dir=None,
         dim_z=None,
         prior=None):

    # DATA
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(
        source=source)
    train_samples = X_train.shape[0]

    # VAR
    noise_var = T.matrix('noise')
    input_var = T.tensor4('inputs')
    log_Z = theano.shared(lasagne.utils.floatX(0.), name='log_Z')

    # MODEL
    logger.info('Building model and graph')
    generator = build_generator(noise_var, dim_z=dim_z)
    discriminator = build_discriminator(input_var)

    # RNG
    trng = RandomStreams(random.randint(1, 1000000))

    # GRAPH / LOSS
    g_output_logit = lasagne.layers.get_output(generator)
    generator_loss, discriminator_loss, D_r, D_f, log_Z_est, log_w, w_tilde, d = BGAN(
        discriminator, g_output_logit, n_samples, trng)

    # OPTIMIZER
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)

    eta = theano.shared(lasagne.utils.floatX(learning_rate))

    updates = lasagne.updates.adam(generator_loss,
                                   generator_params,
                                   learning_rate=eta,
                                   beta1=beta)
    updates.update(
        lasagne.updates.adam(discriminator_loss,
                             discriminator_params,
                             learning_rate=eta,
                             beta1=beta))
    updates.update([(log_Z, 0.95 * log_Z + 0.05 * log_Z_est.mean())])

    # COMPILE
    results = {
        'p(real)': (T.nnet.sigmoid(real_out) > .5).mean(),
        'p(fake': (T.nnet.sigmoid(fake_out) < .5).mean(),
        'G loss': generator_loss,
        'D loss': discriminator_loss,
        'log Z': log_Z,
        'log Z est': log_Z_est.mean(),
        'log_Z est var': log_Z_est.std()**2,
        'log w': log_w.mean(),
        'log w var': log_w.std()**2,
        'norm w': w_tilde.mean(),
        'norm w var': w_tilde.std()**2,
        'ESS': (1. / (w_tilde**2).sum(0)).mean()
    }
    train_fn = theano.function([noise_var, input_var],
                               results,
                               updates=updates)

    gen_fn = theano.function([noise_var],
                             lasagne.layers.get_output(generator,
                                                       deterministic=True))

    # TRAIN
    logger.info('Training...')

    results = {}
    for epoch in range(num_epochs):
        u = 0
        prefix = '{}_{}'.format(method, epoch)

        e_results = {}
        widgets = ['Epoch {}, '.format(epoch), Timer(), Bar()]
        pbar = ProgressBar(widgets=widgets,
                           maxval=(train_samples // batch_size)).start()
        prefix = str(epoch)

        start_time = time.time()
        batch0 = None
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch
            if batch0 is None: batch0 = inputs

            if prior == 'uniform':
                noise = floatX(np.random.rand(len(inputs), dim_z))
            elif prior == 'gaussian':
                noise = floatX(numpy.random.normal(size=(len(inputs), dim_z)))

            outs = train_fn(noise, inputs)
            outs = dict((k, np.asarray(v)) for k, v in outs.items())

            update_dict_of_lists(e_results, **outs)
            u += 1
            pbar.update(u)

        update_dict_of_lists(results, **e_results)
        np.savez(path.join(binary_dir, '{}_results.npz'.format(prefix)),
                 **results)

        try:
            if prior == 'uniform':
                noise = floatX(np.random.rand(100, dim_z))
            elif prior == 'gaussian':
                noise = floatX(numpy.random.normal(size=(64, dim_z)))
            samples = gen_fn(noise)
            summarize(results, samples, image_dir=image_dir, prefix=prefix)
        except Exception as e:
            print(e)
            pass

        logger.info('Epoch {} of {} took {:.3f}s'.format(
            epoch + 1, num_epochs,
            time.time() - start_time))

        np.savez(
            path.join(binary_dir, '{}_generator_params.npz'.format(prefix)),
            *lasagne.layers.get_all_param_values(generator))
        np.savez(
            path.join(binary_dir,
                      '{}_discriminator_params.npz'.format(prefix)),
            *lasagne.layers.get_all_param_values(discriminator))
コード例 #8
0
ファイル: viz.py プロジェクト: akturner/MPAS-Tools
def setup_time_indices(fn_pattern, xtimeName):  # {{{
    """
    This function finds a list of NetCDF files containing time-dependent
    MPAS data and extracts the time indices in each file.  The routine
    insures that each time is unique.
    """
    # Build file list and time indices
    if ';' in fn_pattern:
        file_list = []
        for pattern in fn_pattern.split(';'):
            file_list.extend(glob.glob(pattern))
    else:
        file_list = glob.glob(fn_pattern)
    file_list.sort()

    local_indices = []
    file_names = []
    all_times = []

    if len(file_list) == 0:
        print("No files to process.")
        print("Exiting...")
        sys.exit(0)

    if use_progress_bar:
        widgets = [
            'Build time indices: ',
            Percentage(), ' ',
            Bar(), ' ',
            ETA()
        ]
        time_bar = ProgressBar(widgets=widgets, maxval=len(file_list)).start()
    else:
        print("Build time indices...")

    i_file = 0
    allTIndex = 0
    for file_name in file_list:
        try:
            nc_file = open_netcdf(file_name)
        except IOError:
            print("Warning: could not open {}".format(file_name))
            continue

        if 'Time' not in nc_file.dimensions or xtimeName is None:
            local_times = ['0']
        else:
            local_times = []
            if xtimeName == 'none':
                # no xtime variable so just use integers converted to strings
                for index in range(len(nc_file.dimensions['Time'])):
                    local_times.append(allTIndex)
                    allTIndex += 1
            else:
                if xtimeName not in nc_file.variables:
                    raise ValueError("xtime variable name {} not found in "
                                     "{}".format(xtimeName, file_name))
                xtime = nc_file.variables[xtimeName]
                if len(xtime.shape) == 2:
                    xtime = xtime[:, :]
                    for index in range(xtime.shape[0]):
                        local_times.append(xtime[index, :].tostring())
                else:
                    local_times = xtime[:]

                if (len(local_times) == 0):
                    local_times = ['0']

        nTime = len(local_times)

        for time_idx in range(nTime):
            if local_times[time_idx] not in all_times:
                local_indices.append(time_idx)
                file_names.append(file_name)
                all_times.append(local_times[time_idx])

        i_file = i_file + 1
        nc_file.close()
        if use_progress_bar:
            time_bar.update(i_file)

    if use_progress_bar:
        time_bar.finish()

    return (local_indices, file_names)  # }}}
コード例 #9
0
ファイル: viz.py プロジェクト: akturner/MPAS-Tools
def build_topo_point_and_polygon_lists(nc_file, output_32bit, lonlat):  # {{{

    if output_32bit:
        dtype = 'f4'
    else:
        dtype = 'f8'

    xVertex, yVertex, zVertex = \
        _build_location_list_xyz(nc_file, 'Vertex', output_32bit, lonlat)

    nCells = len(nc_file.dimensions['nCells'])
    nEdges = len(nc_file.dimensions['nEdges'])
    maxEdges = len(nc_file.dimensions['maxEdges'])

    nEdgesOnCell = nc_file.variables['nEdgesOnCell'][:]
    verticesOnCell = nc_file.variables['verticesOnCell'][:, :] - 1
    edgesOnCell = nc_file.variables['edgesOnCell'][:, :] - 1
    verticesOnEdge = nc_file.variables['verticesOnEdge'][:] - 1
    cellsOnEdge = nc_file.variables['cellsOnEdge'][:] - 1

    # 4 points for each edge face
    nPoints = 4 * nEdges
    # 1 polygon for each edge and cell
    nPolygons = nEdges + nCells

    X = numpy.zeros(nPoints, dtype)
    Y = numpy.zeros(nPoints, dtype)
    Z = numpy.zeros(nPoints, dtype)

    outIndex = 0

    # The points on an edge are vertex 0, 1, 1, 0 on that edge, making a
    # vertical rectangle if the points are offset
    iEdges, voe = numpy.meshgrid(numpy.arange(nEdges), [0, 1, 1, 0],
                                 indexing='ij')
    iVerts = verticesOnEdge[iEdges, voe].ravel()
    X[:] = xVertex[iVerts]
    Y[:] = yVertex[iVerts]
    Z[:] = zVertex[iVerts]
    vertices = (X, Y, Z)

    verticesOnPolygon = -1 * numpy.ones((nPolygons, maxEdges), int)
    verticesOnPolygon[0:nEdges, 0:4] = \
        numpy.arange(4*nEdges).reshape(nEdges, 4)

    # Build cells
    if use_progress_bar:
        widgets = [
            'Build cell connectivity: ',
            Percentage(), ' ',
            Bar(), ' ',
            ETA()
        ]
        bar = ProgressBar(widgets=widgets, maxval=nCells).start()
    else:
        print("Build cell connectivity...")

    outIndex = nEdges

    for iCell in range(nCells):
        neoc = nEdgesOnCell[iCell]
        eocs = edgesOnCell[iCell, 0:neoc]
        vocs = verticesOnCell[iCell, 0:neoc]
        for index in range(neoc):
            iVert = vocs[index]
            iEdge = eocs[index]
            # which vertex on the edge corresponds to iVert?
            coes = cellsOnEdge[iEdge, :]
            voes = verticesOnEdge[iEdge, :]

            if coes[0] == iCell:
                if voes[0] == iVert:
                    voe = 0
                else:
                    voe = 1
            else:
                if voes[0] == iVert:
                    voe = 3
                else:
                    voe = 2

            verticesOnPolygon[nEdges + iCell, index] = 4 * iEdge + voe

        outIndex += neoc

        if use_progress_bar:
            bar.update(iCell)

    if use_progress_bar:
        bar.finish()

    validVerts = verticesOnPolygon >= 0

    if lonlat:
        lonEdge = numpy.rad2deg(nc_file.variables['lonEdge'][:])
        latEdge = numpy.rad2deg(nc_file.variables['latEdge'][:])
        lonCell = numpy.rad2deg(nc_file.variables['lonCell'][:])
        latCell = numpy.rad2deg(nc_file.variables['latCell'][:])
        lonPolygon = numpy.append(lonEdge, lonCell)
        latPolygon = numpy.append(latEdge, latCell)

        vertices, verticesOnPolygon = _fix_lon_lat_vertices(
            vertices, verticesOnPolygon, validVerts, lonPolygon)

    if nc_file.on_a_sphere.strip() == 'NO' and \
            nc_file.is_periodic.strip() == 'YES':
        if lonlat:
            xcoord = lonPolygon
            ycoord = latPolygon
        else:
            xEdge = numpy.rad2deg(nc_file.variables['xEdge'][:])
            yEdge = numpy.rad2deg(nc_file.variables['yEdge'][:])
            xCell = numpy.rad2deg(nc_file.variables['xCell'][:])
            yCell = numpy.rad2deg(nc_file.variables['yCell'][:])
            xcoord = numpy.append(xEdge, xCell)
            ycoord = numpy.append(yEdge, yCell)

        vertices, verticesOnPolygon = _fix_periodic_vertices(
            vertices, verticesOnPolygon, validVerts, xcoord, ycoord,
            nc_file.x_period, nc_file.y_period)

    nPoints = len(vertices[0])

    # we want to know the cells corresponding to each point.  The first two
    # points correspond to the first cell, the second two to the second cell
    # (if any).
    cell_to_point_map = -1 * numpy.ones((nPoints), int)
    boundary_mask = numpy.zeros((nPoints), bool)

    # first cell on edge always exists
    coe = cellsOnEdge[:, 0].copy()
    for index in range(2):
        voe = verticesOnPolygon[0:nEdges, index]
        cell_to_point_map[voe] = coe
        boundary_mask[voe] = False

    # second cell on edge may not exist
    coe = cellsOnEdge[:, 1].copy()
    mask = coe == -1
    # use the first cell if the second doesn't exist
    coe[mask] = cellsOnEdge[:, 0][mask]
    for index in range(2, 4):
        voe = verticesOnPolygon[0:nEdges, index]
        cell_to_point_map[voe] = coe
        boundary_mask[voe] = mask

    # for good measure, make sure vertices on cell are also accounted for
    for index in range(maxEdges):
        iCells = numpy.arange(nCells)
        voc = verticesOnPolygon[nEdges:nEdges + nCells, index]
        mask = index < nEdgesOnCell
        cell_to_point_map[voc[mask]] = iCells[mask]
        boundary_mask[voc[mask]] = False

    connectivity = verticesOnPolygon[validVerts]
    validCount = numpy.sum(numpy.array(validVerts, int), axis=1)
    offsets = numpy.cumsum(validCount, dtype=int)
    valid_mask = numpy.ones(nCells, bool)

    return vertices, connectivity, offsets, valid_mask, \
        cell_to_point_map, boundary_mask.ravel()  # }}}
コード例 #10
0
def main():

    global args
    args = parser.parse_args()

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    task_name = args.task_name

    epoch_size = args.epoch_size
    batch_size = args.batch_size

    result_path = os.path.join(args.result_path, args.task_name)
    if args.style_A:
        result_path = os.path.join(result_path, args.style_A)
    result_path = os.path.join(result_path, args.model_arch)

    model_path = os.path.join(args.model_path, args.task_name)
    if args.style_A:
        model_path = os.path.join(model_path, args.style_A)
    model_path = os.path.join(model_path, args.model_arch)

    data_style_A, data_style_B, test_style_A, test_style_B = get_data()

    if args.task_name.startswith('edges2'):
        test_A = read_images(test_style_A, 'A', args.image_size)
        test_B = read_images(test_style_B, 'B', args.image_size)

    elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags' or args.task_name == 'tshirts2watches' or args.task_name == 'watches2tshirts':
        test_A = read_images(test_style_A, 'B', args.image_size)
        test_B = read_images(test_style_B, 'B', args.image_size)

    else:
        test_A = read_images(test_style_A, None, args.image_size)
        test_B = read_images(test_style_B, None, args.image_size)

    test_A = Variable(torch.FloatTensor(test_A), volatile=True)
    test_B = Variable(torch.FloatTensor(test_B), volatile=True)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    generator_A = Generator()
    generator_B = Generator()
    discriminator_A = Discriminator()
    discriminator_B = Discriminator()

    if cuda:
        test_A = test_A.cuda()
        test_B = test_B.cuda()
        generator_A = generator_A.cuda()
        generator_B = generator_B.cuda()
        discriminator_A = discriminator_A.cuda()
        discriminator_B = discriminator_B.cuda()

    data_size = min(len(data_style_A), len(data_style_B))
    n_batches = (data_size // batch_size)

    recon_criterion = nn.MSELoss()
    gan_criterion = nn.BCELoss()
    feat_criterion = nn.HingeEmbeddingLoss()

    gen_params = chain(generator_A.parameters(), generator_B.parameters())
    dis_params = chain(discriminator_A.parameters(),
                       discriminator_B.parameters())

    optim_gen = optim.Adam(gen_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)
    optim_dis = optim.Adam(dis_params,
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    iters = 0

    gen_loss_total = []
    dis_loss_total = []

    for epoch in range(epoch_size):
        data_style_A, data_style_B = shuffle_data(data_style_A, data_style_B)

        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        for i in range(n_batches):

            pbar.update(i)

            generator_A.zero_grad()
            generator_B.zero_grad()
            discriminator_A.zero_grad()
            discriminator_B.zero_grad()

            A_path = data_style_A[i * batch_size:(i + 1) * batch_size]
            B_path = data_style_B[i * batch_size:(i + 1) * batch_size]

            if args.task_name.startswith('edges2'):
                A = read_images(A_path, 'A', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            elif args.task_name == 'handbags2shoes' or args.task_name == 'shoes2handbags' or args.task_name == 'tshirts2watches' or args.task_name == 'watches2tshirts':
                A = read_images(A_path, 'B', args.image_size)
                B = read_images(B_path, 'B', args.image_size)
            else:
                A = read_images(A_path, None, args.image_size)
                B = read_images(B_path, None, args.image_size)

            A = Variable(torch.FloatTensor(A))
            B = Variable(torch.FloatTensor(B))

            if cuda:
                A = A.cuda()
                B = B.cuda()

            AB = generator_B(A)
            BA = generator_A(B)

            ABA = generator_A(AB)
            BAB = generator_B(BA)

            # Reconstruction Loss
            recon_loss_A = recon_criterion(ABA, A)
            recon_loss_B = recon_criterion(BAB, B)

            # Real/Fake GAN Loss (A)
            A_dis_real, A_feats_real = discriminator_A(A)
            A_dis_fake, A_feats_fake = discriminator_A(BA)

            dis_loss_A, gen_loss_A = get_gan_loss(A_dis_real, A_dis_fake,
                                                  gan_criterion, cuda)
            fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion)

            # Real/Fake GAN Loss (B)
            B_dis_real, B_feats_real = discriminator_B(B)
            B_dis_fake, B_feats_fake = discriminator_B(AB)

            dis_loss_B, gen_loss_B = get_gan_loss(B_dis_real, B_dis_fake,
                                                  gan_criterion, cuda)
            fm_loss_B = get_fm_loss(B_feats_real, B_feats_fake, feat_criterion)

            # Total Loss

            if iters < args.gan_curriculum:
                rate = args.starting_rate
            else:
                rate = args.default_rate

            gen_loss_A_total = (gen_loss_B * 0.1 + fm_loss_B * 0.9) * (
                1. - rate) + recon_loss_A * rate
            gen_loss_B_total = (gen_loss_A * 0.1 + fm_loss_A * 0.9) * (
                1. - rate) + recon_loss_B * rate

            if args.model_arch == 'discogan':
                gen_loss = gen_loss_A_total + gen_loss_B_total
                dis_loss = dis_loss_A + dis_loss_B
            elif args.model_arch == 'recongan':
                gen_loss = gen_loss_A_total
                dis_loss = dis_loss_B
            elif args.model_arch == 'gan':
                gen_loss = (gen_loss_B * 0.1 + fm_loss_B * 0.9)
                dis_loss = dis_loss_B

            if iters % args.update_interval == 0:
                dis_loss.backward()
                optim_dis.step()
            else:
                gen_loss.backward()
                optim_gen.step()

            if iters % args.log_interval == 0:
                print("---------------------")
                print("GEN Loss:", as_np(gen_loss_A.mean()),
                      as_np(gen_loss_B.mean()))
                print("Feature Matching Loss:", as_np(fm_loss_A.mean()),
                      as_np(fm_loss_B.mean()))
                print("RECON Loss:", as_np(recon_loss_A.mean()),
                      as_np(recon_loss_B.mean()))
                print("DIS Loss:", as_np(dis_loss_A.mean()),
                      as_np(dis_loss_B.mean()))

            if iters % args.image_save_interval == 0:
                AB = generator_B(test_A)
                BA = generator_A(test_B)
                ABA = generator_A(AB)
                BAB = generator_B(BA)

                n_testset = min(test_A.size()[0], test_B.size()[0])

                subdir_path = os.path.join(
                    result_path, str(iters / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(n_testset):
                    A_val = test_A[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    B_val = test_B[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    BA_val = BA[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    ABA_val = ABA[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.
                    AB_val = AB[im_idx].cpu().data.numpy().transpose(1, 2,
                                                                     0) * 255.
                    BAB_val = BAB[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) * 255.

                    filename_prefix = os.path.join(subdir_path, str(im_idx))
                    scipy.misc.imsave(filename_prefix + '.A.jpg',
                                      A_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.B.jpg',
                                      B_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BA.jpg',
                                      BA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.AB.jpg',
                                      AB_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.ABA.jpg',
                                      ABA_val.astype(np.uint8)[:, :, ::-1])
                    scipy.misc.imsave(filename_prefix + '.BAB.jpg',
                                      BAB_val.astype(np.uint8)[:, :, ::-1])

            if iters % args.model_save_interval == 0:
                torch.save(
                    generator_A,
                    os.path.join(
                        model_path, 'model_gen_A-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    generator_B,
                    os.path.join(
                        model_path, 'model_gen_B-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    discriminator_A,
                    os.path.join(
                        model_path, 'model_dis_A-' +
                        str(iters / args.model_save_interval)))
                torch.save(
                    discriminator_B,
                    os.path.join(
                        model_path, 'model_dis_B-' +
                        str(iters / args.model_save_interval)))

            iters += 1
コード例 #11
0
with tf.variable_scope("model") as scope:
    optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
    inference.initialize(optimizer=optimizer, use_prettytensor=True)

with tf.variable_scope("model", reuse=True) as scope:
    p_rep = tf.sigmoid(model.sample_prior(N_MINIBATCH))

init = tf.initialize_all_variables()
init.run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
    avg_loss = 0.0

    widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
    pbar = ProgressBar(n_iter_per_epoch, widgets=widgets)
    pbar.start()
    for t in range(n_iter_per_epoch):
        pbar.update(t)
        x_train, _ = mnist.train.next_batch(N_MINIBATCH)
        info_dict = inference.update(feed_dict={x_ph: x_train})
        avg_loss += info_dict['loss']

    # Take average over all ELBOs during the epoch, and over minibatch
    # of data points (images).
    avg_loss = avg_loss / n_iter_per_epoch
    avg_loss = avg_loss / N_MINIBATCH

    # Print a lower bound to the average marginal likelihood for an
    # image.
コード例 #12
0
def generate_subtitles(source_path,
                       *,
                       concurrency=DEFAULT_CONCURRENCY,
                       src_language=DEFAULT_SRC_LANGUAGE,
                       subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
                       output=None,
                       verbose=False) -> str:
    audio_filename, audio_rate = extract_audio(source_path)
    regions = find_speech_regions(audio_filename)
    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        widgets = [
            'Converting speech regions to FLAC files: ',
            Percentage(), ' ',
            Bar(), ' ',
            ETA()
        ]
        p_bar = OptionalProgressBar(verbose=verbose,
                                    widgets=widgets,
                                    maxval=len(regions))

        try:
            p_bar.start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                p_bar.update(i)
            p_bar.finish()

            widgets = [
                'Performing speech recognition: ',
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            p_bar = OptionalProgressBar(verbose=verbose,
                                        widgets=widgets,
                                        maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                p_bar.update(i)
            p_bar.finish()
        except KeyboardInterrupt:
            p_bar.finish()
            pool.terminate()
            pool.join()
            print('Cancelling transcription')
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]

    formatter: BaseFormatter = FORMATTERS.get(subtitle_file_format)()
    formatted_subtitles = formatter.generate(timed_subtitles)

    with smart_open(output) as f:
        f.write(formatted_subtitles)

    os.remove(audio_filename)

    if output:
        print('Subtitles file created at {subtitle_file_path}'.format(
            subtitle_file_path=output))

    return formatted_subtitles
コード例 #13
0
    def process(self, filename, cols):
        Rat.frame_loc = 0
        (fname_name, ext) = os.path.splitext(filename)
        left_right = fname_name[-1]
        self.read_data(filename, cols=cols)
        #        data_p1 = self.data[:, 1]
        data_p5 = self.df.loc[:, 'nonzero_p5']
        total_frames = len(data_p5)
        # each sample represents 5 frames (5 sec)
        moving_win = self.windowed_view(data_p5, 10, 5)
        win_mean = np.mean(moving_win, axis=1)
        label = (win_mean > Rat.thMin) & (win_mean < Rat.thMax)
        label[:Rat.jump_rows] = False
        print('win_mean.shape %d' % win_mean.shape)
        print('nonzero of label %d' % np.count_nonzero(label))

        label_win = self.windowed_view(label, 5, 4)
        sum_label = np.sum(label_win, axis=1)
        labelLick = (sum_label == 5)
        #        labelLick = sum_label
        labelLick1 = labelLick.copy()
        for i in range(labelLick.size):
            if labelLick[i] == True:
                labelLick1[i:i + 6] = True

        for i in range(6, labelLick1.size):
            if labelLick1[i] == False:
                labelLick1[i - 6:i] = False

#        labelLick_file_name = '{}/_labelLick_{}.csv'.format(str(self.video_dir), left_right)
#        labelLick.tofile(labelLick_file_name, sep='\n')
#
#        labelLick_file_name = '{}/_labelLick1_{}.csv'.format(str(self.video_dir), left_right)
#        labelLick1.tofile(labelLick_file_name, sep='\n')
#        label_file_name = '{}/_label_{}.csv'.format(str(self.video_dir), left_right)
#        label.tofile(label_file_name, sep='\n')

        print('label_win.shape ', label_win.shape)
        print('labelLick.shape ', labelLick.shape)
        print('sum of labelLick %d, size %d' %
              (np.sum(labelLick), labelLick.size))
        #plt.plot(label)
        #plt.show()
        #        (head_path, vname) = os.path.split(str(self.video_dir))
        video_file = '{}/{}.avi.mkv'.format(str(self.video_dir),
                                            self.out_dir.name)
        #        print ('Rat::video_file ', video_file)
        cap = cv2.VideoCapture(video_file)
        bOpenVideo = cap.isOpened()
        if bOpenVideo == False:
            print('Open Video failed')
            return

        Rat.fps = cap.get(cv2.CAP_PROP_FPS)
        Rat.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        Rat.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        fps_out = Rat.fps // 2
        freqStr = '{0:d}L'.format(int(1000 / fps_out))

        print('fps = %d, w %d, h %d, total_frames %d, freq_out %s' %
              (Rat.fps, Rat.width, Rat.height, total_frames, freqStr))

        print('min duration %d, diff thresh [%.4f %.4f]' %
              (Rat.th_min_duration, Rat.thMin, Rat.thMax))

        bVideoWR = False
        extract_clips = 0

        widgets = [Percentage(), Bar()]
        pbar = ProgressBar(widgets=widgets, maxval=labelLick1.size).start()

        for i in range(5, labelLick1.size):
            frameCounter = i * 5

            if labelLick1[i] == True:
                #print(win_mean[i])
                if bVideoWR == False:
                    start_frame = frameCounter
                    end_frame = frameCounter
                    bVideoWR = True
                else:
                    if frameCounter < total_frames:
                        end_frame = frameCounter
                    else:
                        end_frame = total_frames

            else:
                if bVideoWR == True:
                    bVideoWR = False
                    if end_frame - start_frame > Rat.th_min_duration:
                        self.write_features(start_frame, end_frame, fps_out,
                                            left_right)
                        self.write_video(cap, start_frame, end_frame, fps_out,
                                         left_right)
                        extract_clips += 1

            pbar.update(i)

        pbar.finish()
        print('extract_clips: ', extract_clips)
コード例 #14
0
def generate_subtitles(
    source_path,
    output=None,
    concurrency=DEFAULT_CONCURRENCY,
    src_language=DEFAULT_SRC_LANGUAGE,
    dst_language=DEFAULT_DST_LANGUAGE,
    subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
    api_key=None,
):
    audio_filename, audio_rate = extract_audio(source_path)

    regions = find_speech_regions(audio_filename)

    multiprocessing.freeze_support()
    pool = multiprocessing.Pool(concurrency)
    converter = FLACConverter(source_path=audio_filename)
    recognizer = SpeechRecognizer(language=src_language,
                                  rate=audio_rate,
                                  api_key=GOOGLE_SPEECH_API_KEY)

    transcripts = []
    if regions:
        try:
            widgets = [
                "Converting speech regions to FLAC files: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
            extracted_regions = []
            for i, extracted_region in enumerate(pool.imap(converter,
                                                           regions)):
                extracted_regions.append(extracted_region)
                pbar.update(i)
            pbar.finish()

            widgets = [
                "Performing speech recognition: ",
                Percentage(), ' ',
                Bar(), ' ',
                ETA()
            ]
            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

            for i, transcript in enumerate(
                    pool.imap(recognizer, extracted_regions)):
                transcripts.append(transcript)
                pbar.update(i)
            pbar.finish()

            if not is_same_language(src_language, dst_language):
                if api_key:
                    google_translate_api_key = api_key
                    translator = Translator(dst_language,
                                            google_translate_api_key,
                                            dst=dst_language,
                                            src=src_language)
                    prompt = "Translating from {0} to {1}: ".format(
                        src_language, dst_language)
                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
                    pbar = ProgressBar(widgets=widgets,
                                       maxval=len(regions)).start()
                    translated_transcripts = []
                    for i, transcript in enumerate(
                            pool.imap(translator, transcripts)):
                        translated_transcripts.append(transcript)
                        pbar.update(i)
                    pbar.finish()
                    transcripts = translated_transcripts
                else:
                    print(
                        "Error: Subtitle translation requires specified Google Translate API key. "
                        "See --help for further information.")
                    return 1

        except KeyboardInterrupt:
            pbar.finish()
            pool.terminate()
            pool.join()
            print("Cancelling transcription")
            raise

    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
    formatter = FORMATTERS.get(subtitle_file_format)
    formatted_subtitles = formatter(timed_subtitles)

    dest = output

    if not dest:
        base, ext = os.path.splitext(source_path)
        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

    with open(dest, 'wb') as f:
        f.write(formatted_subtitles.encode("utf-8"))

    os.remove(audio_filename)

    return dest
コード例 #15
0
ファイル: preprocess.py プロジェクト: B3arAndW0lf/dotaPredict
NUM_HEROES = 108
NUM_FEATURES = NUM_HEROES * 2

# Our training label vector, Y, is a bit vector indicating
# whether radiant won (1) or lost (-1)
NUM_MATCHES = matches.count()

# Initialize training matrix
X = np.zeros((NUM_MATCHES, NUM_FEATURES), dtype=np.int8)


# Initialize training label vector
Y = np.zeros(NUM_MATCHES, dtype=np.int8)

widgets = [FormatLabel('Processed: %(value)d/%(max)d matches. '), ETA(), ' ', Percentage(), ' ', Bar()]
pbar = ProgressBar(widgets = widgets, maxval = NUM_MATCHES).start()

for i, record in enumerate(matches.find()):
    Y[i] = 1 if record['radiant_win'] else -1
    players = record['players']
    for player in players:
        hero_id = player['hero_id'] - 1

        # If the left-most bit of player_slot is set,
        # this player is on dire, so push the index accordingly
        player_slot = player['player_slot']
        if player_slot >= 128:
            hero_id += NUM_HEROES

        X[i, hero_id] = 1
コード例 #16
0
def createprogress(count):
    """Return progress Bar"""
    widgets = [Percentage(), ' ', Bar(), ' ', AdaptiveETA()]
    pbar = ProgressBar(widgets=widgets, maxval=count)
    pbar.start()
    return pbar
コード例 #17
0
    def modify_one_by_one_function(self, name):
        """Apply a function (local search, mutation) to all chromosomes."""
        start = time.time()
        if self.progress_bar:
            print("{}:".format(name))

        if name == "Local search":
            current_function = self.memetic_function
        elif name == "Mutation":
            current_function = self.mutation_function
            name = name[:8] + ' ' + self.config["mutation_type"]
        else:
            raise NameError("Bad type of function.")

        if self.iteration > 1:
            if name in self.logs[-2].keys():
                if self.logs[-2][name]["step_time"] < 4:
                    self.progress_bar = False
                else:
                    self.progress_bar = True

        if self.fitness_function.name in ["fully connected", "convnet"]:
            self.progress_bar = False

        if self.pool:
            p = Pool(self.pool_size)
            manager = Manager()
            lock = manager.Lock()
            counter = manager.Value('i', 0)
            if self.progress_bar:
                pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], term_width=60, maxval=len(self.population)).start()
            else:
                pbar = None

            def pool_function(inside_lock, inside_counter, inside_member):
                inside_lock.acquire()
                inside_counter.value += 1
                inside_lock.release()

                inside_member.apply_on_chromosome(current_function, gpu=inside_counter.value % 4)

                inside_lock.acquire()
                if pbar:
                    pbar.update(inside_counter.value)
                inside_lock.release()

                return inside_member

            func = partial(pool_function, lock, counter)
            first = 1 if self.elitism and name == "Mutation" else 0

            members = p.map(func, self.population[first:])

            if self.elitism and name == "Mutation":
                members.append(self.population[0])

            self.population.current_population = members
            p.terminate()
        else:
            if self.progress_bar:
                pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], term_width=60, maxval=len(self.population)).start()
            ignor_first = self.elitism and name == "Mutation"

            for i, member in enumerate(self.population):
                if self.progress_bar:
                    pbar.update(i + 1)
                if not ignor_first:
                    member.apply_on_chromosome(current_function)
                ignor_first = False

        if self.progress_bar:
            pbar.finish()

        step_time = time.time() - start

        if step_time < 120:
            print('{0} time: {1:.2f}s\n'.format(name, step_time))
        else:
            print('{0} time: {1:.2f}min\n'.format(name, step_time // 60))

        return step_time, name
コード例 #18
0
def example0():
    pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=300).start()
    for i in range(300):
        time.sleep(0.01)
        pbar.update(i + 1)
    pbar.finish()
コード例 #19
0
 sess.run(tf.global_variables_initializer())
 add_num = 0
 if os.path.exists('../logs_lm/checkpoint'):
     print('loading language model...')
     latest = tf.train.latest_checkpoint('logs_lm')
     add_num = int(latest.split('_')[-1])
     saver.restore(sess, latest)
 writer = tf.summary.FileWriter('../logs_lm/tensorboard',
                                tf.get_default_graph())
 for k in range(epochs):
     total_loss = 0
     batch = train_data.get_lm_batch()
     widgets = [
         'this is the ' + str(k + 1) + 'th epochs tinning !!!',
         Percentage(), ' ',
         Bar(), ' ',
         ETA()
     ]
     pbar = ProgressBar(widgets=widgets, maxval=batch_num).start()
     for i in range(batch_num):
         input_batch, label_batch = next(batch)
         feed = {lm.x: input_batch, lm.y: label_batch}
         cost, _ = sess.run([lm.mean_loss, lm.train_op], feed_dict=feed)
         total_loss += cost
         if (k * batch_num + i) % 10 == 0:
             rs = sess.run(merged, feed_dict=feed)
             writer.add_summary(rs, k * batch_num + i)
         pbar.update(i)
     pbar.finish()
     print('epochs', k + 1, ': average loss = ', total_loss / batch_num)
     saver.save(sess, '../logs_lm/model_%d' % (epochs + add_num))
コード例 #20
0
def main():

    if TEST:
        sys.stdout.write('Run in TEST mode! \n')

    args = sys.argv[1:]
    if len(args) < 2:
        return usage()

    infile = args[0]
    outfile = args[1]
    check_outfile_path(outfile)

    fin = ROOT.TFile(infile)
    t = fin.Get('tree')
    entries = t.GetEntriesFast()

    pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=entries).start()
    time_start = time()

    #    fout = ROOT.TFile(outfile, "RECREATE")
    #    t_out = ROOT.TTree('signal', 'signal')
    #    mystruct = ROOT.MyTreeStruct()
    ##    mystruct2 = ROOT.MyTreeStruct2()
    #    t_out.Branch('vtx_mrecpipi', mystruct, 'vtx_mrecpipi/D')

    #    t_out.Branch('indexmc', mystruct2, 'indexmc/D')
    #    t_out.Branch('pdgid', mystruct, 'm_pdgid[100]/I')
    #    t_out.Branch('trkidx', mystruct, 'm_trkidx[100]/I')
    #    t_out.Branch('motherpid', mystruct, 'm_motherpid[100]/I')
    #    t_out.Branch('motheridx', mystruct, 'm_motheridx[100]/I')

    for jentry in xrange(entries):
        pbar.update(jentry + 1)
        # get the next tree in the chain and verify
        ientry = t.LoadTree(jentry)
        if ientry < 0:
            break
        # copy next entry into memory and verify

        if TEST and ientry > 10000:
            break

        nb = t.GetEntry(jentry)
        if nb <= 0:
            continue

        if NonPiPiJpsi:  # Non-PiPiJpsi
            if not (check_pipiJpsi(t)):
                fill_histograms_all_combination(t)
        else:  # Normal
            fill_histograms_all_combination(t)

    fout = ROOT.TFile(outfile, "RECREATE")
    #   t_out.Write()
    write_histograms()
    fout.Close()
    pbar.finish()

    dur = duration(time() - time_start)
    sys.stdout.write(' \nDone in %s. \n' % dur)
コード例 #21
0
def check_facebookcontact(elementtype, xml_root):
    global LIMIT_COUNTER, ARGS
    list_suffix2kill = [
        'about',
        'community',
        'info',
        'posts',
        'reviews',
        'services',
        'timeline',
    ]
    list_subdomains2kill = [
        'm', 'web', 'b-m', 'da-dk', 'de-de', 'el-gr', 'en-gb', 'es-es',
        'fr-fr', 'he-il', 'hr-hr', 'is-is', 'it-it', 'nl-nl', 'pl-pl', 'si-si'
    ]
    bar_max = len(xml_root.findall(elementtype))
    if int(ARGS.limit) > 0 and int(
            ARGS.limit) < len(xml_root.findall('relation')) + len(
                xml_root.findall('way')) + len(xml_root.findall('node')):
        bar_max = int(ARGS.limit)

    widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', AdaptiveETA()]
    p_bar = progressbar.ProgressBar(widgets=widgets, maxval=bar_max)
    p_bar.start()
    for element in xml_root.findall(elementtype):
        element_changed = False

        # save file and exit if limit is reached
        if int(ARGS.limit) > 0 and LIMIT_COUNTER >= int(ARGS.limit):
            with open(ARGS.export, 'w') as f:
                f.write(ET.tostring(xml_root, encoding='utf8').decode('utf8'))
            break

        # iterate throug elements
        for tag in element.findall('tag'):
            initial_url = ''

            # if we have found a website-tag with facebook in the url we move it to the contact:facebook-tag
            if tag.attrib['k'] == 'website' and tag.attrib['v'].find(
                    'facebook') >= 0:
                tag.attrib['k'] = 'contact:facebook'
                element_changed = True

            if tag.attrib['k'] == 'contact:facebook':
                # save initial url-value to variable: initial_url
                initial_url = tag.attrib['v']
                # load initial url into variable for substitute-url
                sub_url = initial_url

                # apply-subdomain-replacement
                for subdomain in list_subdomains2kill:
                    sub_url = sub_url.replace(subdomain + '.facebook.com',
                                              'www.facebook.com')

                # fix: urls starting with //
                sub_url = re.sub(r"^\/\/(www\.|)facebook(\.com|\.de|\.pl)\/",
                                 "https://www.facebook.com/", sub_url, 0)
                # fix: urls starting with www.facebook* or facebook*
                sub_url = re.sub(r"^(www\.|)facebook(\.com|\.de|\.pl)\/",
                                 "https://www.facebook.com/", sub_url, 0)
                # fix: replace http(s)://facebook* by https://www.facebook.com
                sub_url = re.sub(r"http(|s)\:\/\/facebook(\.com|\.de|\.pl)\/",
                                 "https://www.facebook.com/", sub_url, 0)

                # fix: cut the category-part out of the url
                sub_url = re.sub(
                    r"^https\:\/\/(www\.|)facebook\.com\/pages\/category\/[0-9a-zA-Z-]+\/",
                    "https://www.facebook.com/pages/", sub_url, 0)

                # test: if url is a redirection find its final target and use this as substitute-url
                if requests.get(sub_url).status_code != 200:
                    test_url = re.sub(
                        r"^https\:\/\/www\.facebook\.com\/pages\/",
                        "https://www.facebook.com/", sub_url, 0)
                    if requests.get(test_url).status_code == 200:
                        sub_url = test_url

                # if the url does not contain photo/media-parts we do not need any get parameters
                if sub_url.find('profile.php') == -1 and sub_url.find(
                        '/media/set/') == -1 and sub_url.find('/photo/') == -1:
                    sub_url = re.findall(r"^([^?]+)", sub_url)[0]

                # if we got a new url so far, we tag this element as changed
                if str(initial_url) != str(sub_url):
                    # print(initial_url + ' > ' + sub_url)
                    tag.attrib['v'] = sub_url
                    element_changed = True

                # sometimes we get redirected via the login-page. in this case we will have to html-decrypt the next-param and replay the get-param-removal
                r = requests.get(tag.attrib['v'], headers=HEADERS)
                if r.url != tag.attrib['v']:
                    login_url = re.findall(
                        r"https:\/\/www\.facebook\.com\/login\/\?next\=(.*)",
                        r.url)
                    if login_url:
                        decoded_url = urllib.parse.unquote(login_url[0])
                        if decoded_url.find(
                                'profile.php') == -1 and decoded_url.find(
                                    '/media/set/') == -1 and decoded_url.find(
                                        '/photo/') == -1:
                            decoded_url = re.findall(r"^([^?]+)",
                                                     decoded_url)[0]
                        if initial_url != decoded_url:
                            tag.attrib['v'] = decoded_url

                    else:
                        if initial_url != r.url:
                            # if we can reach that url we take it as replacement otherwise remove it
                            if r.status_code == 200:
                                tag.attrib['v'] = r.url
                            if r.status_code == 404:
                                tag.attrib['v'] = ''
                                # print(r.url + ' >> DELETED because of 404')

                # kill all suffixes from url
                for suffix in list_suffix2kill:
                    if tag.attrib['v'].endswith('/' + suffix):
                        tag.attrib['v'] = tag.attrib['v'][:-(len(suffix) + 1)]
                    if tag.attrib['v'].endswith('/' + suffix + '/'):
                        tag.attrib['v'] = tag.attrib['v'][:-(len(suffix) + 2)]

                # replay login-redirection fix
                if tag.attrib['v'] != initial_url:
                    # print(initial_url + ' >>> ' + tag.attrib['v'])
                    if tag.attrib['v'].find(
                            'profile.php') == -1 and tag.attrib['v'].find(
                                '/media/set/') == -1 and tag.attrib['v'].find(
                                    '/photo/') == -1 and re.findall(
                                        r"^([^?]+)", tag.attrib['v']):
                        tag.attrib['v'] = re.findall(r"^([^?]+)",
                                                     tag.attrib['v'])[0]
                    if tag.attrib['v'] != '' and requests.get(
                            tag.attrib['v'],
                            headers=HEADERS).status_code == 404:
                        tag.attrib['v'] = ''
                    # print(initial_url + ' >> ' + tag.attrib['v'])

                sys.stdout.write('[' + str(LIMIT_COUNTER) + '/' +
                                 str(bar_max) + '] ' + initial_url +
                                 "\n >>> " + tag.attrib['v'] + " \n")
                p_bar.update(LIMIT_COUNTER)

        if element_changed:
            if LIMIT_COUNTER + 1 > int(ARGS.offset) or (LIMIT_COUNTER == 0 and
                                                        int(ARGS.offset) == 0):
                # set modify-tag if element is in range
                element.attrib['action'] = 'modify'
                empty_tags = element.findall("tag[@v='']")
                if empty_tags:
                    for empty_tag in empty_tags:
                        element.remove(empty_tag)
                RESULT_ROOT.append(element)
            LIMIT_COUNTER += 1
            if int(ARGS.limit) > 0 and LIMIT_COUNTER >= int(ARGS.limit):
                p_bar.finish()
コード例 #22
0
    def train(self):

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.per_process_gpu_memory_fraction = 0.45
        with tf.Session(config=config) as sess:
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                saver = tf.train.Saver(tf.all_variables(),
                                       keep_checkpoint_every_n_hours=2)

                # summary_op = tf.merge_all_summaries()
                summary_writer = tf.summary.FileWriter(self.log_dir,
                                                       sess.graph)

                keys = ["d_loss", "g_loss"]
                log_vars = []
                log_keys = []
                for k, v in self.log_vars:
                    if k in keys:
                        log_vars.append(v)
                        log_keys.append(k)
                        # print(k, v)
                generator_lr = cfg.TRAIN.GENERATOR_LR
                discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
                num_embedding = cfg.TRAIN.NUM_EMBEDDING
                lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
                number_example = self.dataset.train._num_examples
                updates_per_epoch = int(number_example / self.batch_size)
                epoch_start = int(counter / updates_per_epoch)
                for epoch in range(epoch_start, self.max_epoch):
                    widgets = [
                        "epoch #%d|" % epoch,
                        Percentage(),
                        Bar(),
                        ETA()
                    ]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()

                    if epoch % lr_decay_step == 0 and epoch != 0:
                        generator_lr *= 0.5
                        discriminator_lr *= 0.5

                    all_log_vals = []
                    for i in range(updates_per_epoch):
                        pbar.update(i)
                        # training d
                        images, wrong_images, embeddings, _, _ =\
                            self.dataset.train.next_batch(self.batch_size,
                                                          num_embedding)

                        feed_dict = {
                            self.images: images,
                            self.wrong_images: wrong_images,
                            self.embeddings: embeddings,
                            self.generator_lr: generator_lr,
                            self.discriminator_lr: discriminator_lr
                        }
                        # train d
                        feed_out = [
                            self.discriminator_trainer, self.d_sum,
                            self.hist_sum, log_vars, self.embeddings,
                            self.fake_embeddings
                        ]

                        for j in range(self.opt.dis_steps):
                            _, d_sum, hist_sum, log_vals, real_emb, fake_emb = sess.run(
                                feed_out, feed_dict)

                        summary_writer.add_summary(d_sum, counter)
                        summary_writer.add_summary(hist_sum, counter)
                        all_log_vals.append(log_vals)

                        # train g
                        feed_out = [self.generator_trainer, self.g_sum]
                        for k in range(self.opt.gen_steps):
                            _, g_sum = sess.run(feed_out, feed_dict)

                        summary_writer.add_summary(g_sum, counter)
                        # save checkpoint
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = "%s/%s_%s.ckpt" %\
                                             (self.checkpoint_dir,
                                              self.exp_name,
                                              str(counter))
                            fn = saver.save(sess, snapshot_path)

                            EMB = np.concatenate((real_emb, fake_emb))
                            y = np.zeros(EMB.shape[0])
                            y[:real_emb.shape[0]] = 1

                            print("Model saved in file: %s" % fn)

                    img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
                    summary_writer.add_summary(img_sum, counter)

                    avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                    dic_logs = {}
                    for k, v in zip(log_keys, avg_log_vals):
                        dic_logs[k] = v
                        # print(k, v)

                    log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k]))
                                         for k in dic_logs)
                    print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
                    if np.any(np.isnan(avg_log_vals)):
                        raise ValueError("NaN detected!")
コード例 #23
0
def europe_data(network_objects, network_path, data_path, nuts_path, europe_data_path):
    if network_path is not None:
        out_path = str(network_path) + '/freight_data'
        network_files = str(network_path) + '/network_files'

        nuts_path = str(data_path) + '/nuts_borders'
        europe_data_path = str(data_path) + '/GQGV_2014_Mikrodaten.csv'

    if os.path.isfile(str(out_path) + '/nuts_europe_dict.pkl') is False or out_path is None:
        print(datetime.datetime.now(), 'Europe data manipulating begins ...')
        if network_path is not None:
            # Find best graph:
            if os.path.isfile(str(network_files) + "/eu_connected_graph_bytime.gpickle"):
                graph_path = str(network_files) + '/eu_connected_graph_bytime.gpickle'
                print(datetime.datetime.now(), 'Graph loaded: eu_connected_graph_bytime')
            elif os.path.isfile(str(network_path) + "/bc_official/eu_network_graph_with_official_bc.gpickle"):
                graph_path = str(network_path) + '/bc_official/eu_network_graph_with_official_bc.gpickle'
                print(datetime.datetime.now(), 'Graph loaded: eu_network_graph_with_official_bc')
            else:
                graph_path = str(network_files) + '/eu_network_largest_graph_bytime.gpickle'
                print(datetime.datetime.now(), 'Graph loaded: eu_network_largest_graph_bytime')

            # Check if out_path exists or create it
            if not os.path.exists(str(out_path)):
                os.makedirs(str(out_path))
                print(datetime.datetime.now(), 'Directory created.')
            else:
                print(datetime.datetime.now(), 'Directory exists.')

            # Create dictionary with nuts id and coordinates of centroid and closest node id to it
            # IMPORT G graph with largest network
            G = nx.read_gpickle(str(graph_path))

            # # IMPORT nodes_europe
            # file = open(str(network_files) + "/europe_nodes_dict2056.pkl", 'rb')
            file = open(str(network_files) + "/europe_nodes_dict4326.pkl", 'rb')
            nodes_europe_2056 = pickle.load(file)
            file.close()
        else:
            out_path = None
            G = network_objects[0]
            nodes_europe_2056 = network_objects[2]

        print(datetime.datetime.now(), 'Graph has: ' + str(
            len([len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)])) + ' island with '
              + str(G.number_of_nodes()) + '/' + str(G.number_of_edges()) + ' (Nnodes/Nedges)')
        print(datetime.datetime.now(), 'Nnodes in nodes_europe_2056: ' + str(len(nodes_europe_2056)))


    if os.path.isfile(str(out_path) + '/nuts_europe_dict.pkl') is False or out_path is None:
        # Merge data from NUTS into one gdf:
        unique_nuts_gdf = nuts_merging(nuts_path)

        # Build tree for KDTree nearest neighbours search, in G only start and end nodes are included
        # OPTION 3: input only nodes in largest network in G
        G_nodes = list(G.nodes)
        G_nodes.sort(key=float)
        G_lonlat = []
        i = 0
        node_sel = G_nodes[i]
        for id in list(nodes_europe_2056):
            if node_sel == int(id):
                lonlat = nodes_europe_2056[id]
                G_lonlat.append(lonlat)
                if i < len(G_nodes) - 1:
                    i += 1
                    node_sel = G_nodes[i]
        print(datetime.datetime.now(), 'KDTree has: ' + str(len(G_lonlat)) + ' nodes.')
        print('------------------------------------------------------------------------')

        nuts_europe = {}
        tree = spatial.KDTree(G_lonlat)
        pbar = ProgressBar(widgets=[Bar('>', '[', ']'), ' ',
                                            Percentage(), ' ',
                                            ETA()], maxval=len(unique_nuts_gdf))
        for i in range(len(unique_nuts_gdf)):
        # for i in pbar(range(len(unique_nuts_gdf))):
            nuts_id = unique_nuts_gdf.iloc[i]['NUTS_ID']
            polygon = unique_nuts_gdf.iloc[i]['geometry']
            centroid = (polygon.centroid.x, polygon.centroid.y)

            # this gives the closest nodes id from the nuts centroid coordinates
            nn = tree.query(centroid)
            coord = G_lonlat[nn[1]]
            closest_node_id = int(list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))])

            # stores as dictionary
            nuts_europe[nuts_id] = [centroid, closest_node_id]
            # print(datetime.datetime.now(), i, end="\r")

        # EXPORT nuts_centroid_dict TO FILE
        if out_path is not None:
            with open(str(out_path) + '/nuts_europe_dict' + '.pkl', 'wb') as f:
                pickle.dump(nuts_europe, f, pickle.HIGHEST_PROTOCOL)

        print(datetime.datetime.now(), len(nuts_europe))
        print('------------------------------------------------------------------------')
    elif os.path.isfile(str(out_path) + "/od_europesum_df.csv") is False:
        # CHECKPOINT: load nuts dictionary
        file = open(str(out_path) + '/nuts_europe_dict.pkl', 'rb')
        nuts_europe = pickle.load(file)
        file.close()
        print(datetime.datetime.now(), 'Nnuts in nuts_europe: ' + str(len(nuts_europe)))
        print('------------------------------------------------------------------------')

    if os.path.isfile(str(out_path) + '/od_europesum_df.csv') is False or out_path is None:
        # load europe OD matrix ('GQGV_2014_Mikrodaten.csv' file)
        od_europe_df = pd.read_csv(europe_data_path, sep=",")

        # select relevant columns from dataframe
        od_europesum_df = od_europe_df[
            ['OID', 'ORIGIN', 'DESTINATION', 'BORDER_CROSSING_IN', 'BORDER_CROSSING_OUT', 'KM_PERFORMANCE', 'WEIGHTING_FACTOR',
             'DIVISOR']]

        # add columns (o_node_id, d_node_id) to dataframe with closest node depending origin and destination NUT
        # also creating list of NUTS ('missing_nuts') which are not defined in the dictionary nuts_europe
        missing_nuts = []

        def od_func_eu(origin, destination, rowname):
            try:
                o_node_id = nuts_europe[origin][1]
            except:
                o_node_id = None
                if origin not in missing_nuts:
                    missing_nuts.append(origin)
            try:
                d_node_id = nuts_europe[destination][1]
            except:
                d_node_id = None
                if destination not in missing_nuts:
                    missing_nuts.append(destination)

            # print(datetime.datetime.now(), rowname, end="\r")
            return pd.Series([o_node_id, d_node_id])

        od_europesum_df[['o_node_id', 'd_node_id']] = od_europesum_df.apply(
            lambda row: od_func_eu(row['ORIGIN'], row['DESTINATION'], row.name), axis=1)

        df = pd.DataFrame(data={"missing_nuts": missing_nuts})
        od_europesum_df = pd.DataFrame.dropna(
        od_europesum_df)  # in case there are missing nuts not defined in the dictionary

        if out_path is not None:
            df.to_csv(str(out_path) + "/missing_nuts.csv", sep=',', index=False)
            od_europesum_df.to_csv(str(out_path) + "/od_europesum_df.csv", sep=",", index=None)

        print(datetime.datetime.now(), 'Process of manipulating europa data finished')
        print('------------------------------------------------------------------------')
    else:
        print(datetime.datetime.now(), 'Manipulated data already exists.')
        print('------------------------------------------------------------------------')


    # Last filter for 2_routing
    if os.path.isfile(str(out_path) + "/od_incorrect_DABC.csv") is True:
        od_incorrect_DABC = pd.read_csv(str(out_path) + "/od_incorrect_DABC.csv", encoding='latin1')
        print('Nroutes in od_incorrect_DABC: '+ str(len(od_incorrect_DABC)))
        od_incorrect_DABC.head()

        # DROP ROWS FROM incorrect dataframe from 1_ROUTING
        droprows = []
        print(len(od_europesum_df))
        for i in range(len(od_europesum_df)):
            print(i, end="\r")
            oid = od_europesum_df.iloc[i]['OID']
            if oid in list(od_incorrect_DABC['OID']):
                droprows.append(i)
        od_europesum_df=od_europesum_df.drop(od_europesum_df.index[droprows])
        od_europesum_df.to_csv(str(out_path) + "/od_europesum_df.csv", sep = ",", index = None, encoding='latin1')

        print(len(od_europesum_df))
        print('------------------------------------------------------------------------')



# CH DATA
# -------------------------------------------------------------------------------
# Similar process than europe data:
# CREATE DICTIONARY WITH CENTROID COORDINATES AND CLOSEST NODE IN SELECTED NETWORK OF EVERY PLZ
# plz_ch = {}
# tree = spatial.KDTree(G_lonlat)
#
# def closest_node(centroid, plz_id):
#     nn = tree.query(centroid, k=10000)
#     in_ch = ch_border.contains(Point(centroid))
#     # this condition ensures to find a closest node of the network that is in the same side of the border than the centroid
#     if in_ch == True:
#         for point in nn[1]:
#             coord = G_lonlat[point]
#             node_in_ch = ch_border.contains(Point(coord[0], coord[1]))
#             if node_in_ch == True:
#                 closest_node_id = int(
#                     list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))])
#                 break
#     elif in_ch == False:
#         for point in nn[1]:
#             coord = G_lonlat[point]
#             node_in_ch = ch_border.contains(Point(coord[0], coord[1]))
#             if node_in_ch == False:
#                 closest_node_id = int(
#                     list(nodes_europe_2056.keys())[list(nodes_europe_2056.values()).index((coord[0], coord[1]))])
#                 break
#     plz_ch[str(plz_id)] = [centroid, closest_node_id,
#                            in_ch]  # string because in the freight data plz are stored as strings, so later to match
#
# for i in range(0, len(plz_gdf)):
#     plz_id = plz_gdf.iloc[i]['PLZ']
#     if plz_id not in list(plz_ch):
#         poly_list = []
#         # this searches for different polygons with the same PLZ code, to find the centroid of the mixture of all of them
#         for j in range(i, len(plz_gdf)):
#             if plz_gdf.iloc[j]['PLZ'] == plz_id:
#                 polygon = plz_gdf.iloc[j]['geometry']
#                 poly_list.append(polygon)
#
#     boundary = gpd.GeoSeries(cascaded_union(poly_list))
#     centroid = boundary[0].centroid.coords[0]
#     funct_sol = closest_node(centroid, plz_id)
#     print(i, end="\r")
# # EXPORT nuts_centroid_dict TO FILE
# with open(str(out_path) + '\plz_ch_dict' + '.pkl', 'wb') as f:
#     pickle.dump(plz_ch, f, pickle.HIGHEST_PROTOCOL)
#
# print(len(plz_ch))
# # plz_ch
#
# #JOURNEYCH data
# od_ch_df = pd.read_csv(str(in_path)+'/freight_data/freight/gte/GTE_2017/Donnees/journeych.csv', sep=";",
#                        low_memory=False)
# # selects relevant columns from the od matrix
# od_chsum_df = od_ch_df [['ernr','fromPlz', 'toPlz', 'fromNuts', 'toNuts']]
# for i in range(0,len(od_chsum_df)):
#     od_chsum_df.set_value(i, 'fromPlz', od_chsum_df.iloc[i]['fromPlz'].rstrip())
#     od_chsum_df.set_value(i, 'toPlz', od_chsum_df.iloc[i]['toPlz'].rstrip())
# #     od_chsum_df.at[i, 'fromPlz'] = od_chsum_df.iloc[i]['fromPlz'].rstrip() #in case set_value gets removed from pandas
# #     od_chsum_df.at[i, 'toPlz'] = od_chsum_df.iloc[i]['toPlz'].rstrip() #in case set_value gets removed from pandas
# #     od_chsum_df.iloc[i]['fromPlz'] = od_chsum_df.iloc[i]['fromPlz'].rstrip() #TOO SLOW
# #     od_chsum_df.iloc[i]['toPlz'] = od_chsum_df.iloc[i]['toPlz'].rstrip() #TOO SLOW
#     print (i, end="\r")
#
# # THIS ADDS THE GROSSINGFACTOR TO THE MANIPULATED DATA from switzerland
# od_chw_df = pd.read_csv(str(in_path)+'/freight_data/freight/gte/GTE_2017/Donnees/week.csv', sep=";", low_memory=False)
# od_chwsum_df = od_chw_df [['ernr','grossingFactor']]
# od_chsum_df=od_chsum_df.merge(od_chwsum_df)
# od_chsum_df.head()
#
# # add columns (o_node_id, d_node_id) to dataframe with closest node depending origin and destination PLZ
# # also creating list of PLZs ('missing_plz') which are not defined in the dictionary plz_ch
# missing_plz = []
#
#
# def od_func_ch(origin, destination, rowname):
#     try:
#         o_node_id = int(plz_ch[origin][1])
#     except:
#         o_node_id = None
#         if origin not in missing_plz:
#             missing_plz.append(origin)
#     try:
#         d_node_id = int(plz_ch[destination][1])
#     except:
#         d_node_id = None
#         if destination not in missing_plz:
#             missing_plz.append(destination)
#
#     print(rowname, end="\r")
#     return pd.Series([o_node_id, d_node_id])
#
#
# od_chsum_df[['o_node_id', 'd_node_id']] = od_chsum_df.apply(
#     lambda row: od_func_ch(row['fromPlz'], row['toPlz'], row.name), axis=1)
#
# df = pd.DataFrame(data={"missing_plz": missing_plz})
# df.to_csv("./missing_plz.csv", sep=',', index=False)
#
# od_chsum_df = pd.DataFrame.dropna(od_chsum_df)  # in case there are missing nuts not defined in the dictionary
# od_chsum_df.to_csv(str(out_path) + "/od_chsum_df.csv", sep=",", index=None)
#
# print(len(od_chsum_df))
# print(len(missing_plz))
# od_chsum_df.head()
コード例 #24
0
    def play_random_games(self,
                          num_games,
                          save_to_disk=False,
                          save_every=100,
                          save_path=''):
        '''Simulates a specified number of games where each game involves two agents
        randomly selected from the population.
 
        If you want to periodically save population to disk specify save_to_disk=True.
        Also specify how often you want to save in save_every, and the path in save_path. Default
        is current working path.'''

        #Initialize the progress bar
        progress = ProgressBar(widgets=[Percentage(),
                                        Bar(), ETA()],
                               maxval=num_games).start()

        if save_to_disk:
            # Make sure the path does not already exist
            while path.exists(save_path):
                try:
                    old_file_num = int(save_path[save_path.find('_') + 1:])
                    new_file_num = old_file_num + 1
                    save_path = save_path[0:save_path.find('_')] + '_' + str(
                        new_file_num)
                except ValueError:
                    save_path = save_path + "_1"

            self.folder = save_path
            mkdir(save_path)
            self.population.save(path=save_path, suffix='0')

        #Pre-sample the stimuli
        stimuli = self.color_grid.sample(
            num_games, 2
        )  # list of lists (len=2), where each element is a 2-tuple representing a chip

        #Run the games
        for i in range(num_games):

            #Randomly select the agents for a single game
            agent_keys = self.population.get_random_pair()

            #Save the game history
            #self.game_history.append([agent_keys, stimuli[i]])	# stimuli[i] = [chip1, chip2] where chip1,2 are 2-tuples

            #Play the game
            self.play_game(agent_keys, stimuli[i])

            #Update the status bar
            progress.update(i + 1)

            #Save to disk
            if save_to_disk and (i % save_every) == 0:
                self.population.save(path=save_path, suffix=str(i))

        # Save the last iteration if it was not already saved
        if save_to_disk and (i % save_every) != 0:
            self.population.save(path=save_path, suffix=str(i))

        #End the status bar
        progress.finish()
コード例 #25
0
#
#################################################################################

import argparse
import numpy as np
from sklearn.model_selection import StratifiedKFold
from progressbar import AnimatedMarker, Bar, BouncingBar, Counter, ETA, \
    FileTransferSpeed, FormatLabel, Percentage, \
    ProgressBar, ReverseBar, RotatingMarker, \
    SimpleProgress, Timer

# progress bar settings
widgets = [
    'Progress: ',
    Percentage(), ' ',
    Bar(marker=RotatingMarker()), ' ',
    ETA()
]


def parseFileName(filepath):

    tokens = filepath.split("/")
    filename = tokens[len(tokens) - 1]
    tokens = filename.split(".")
    filename_no_ext = tokens[len(tokens) - 2]
    return filename_no_ext


def parseOutputFolderPath(filepath):
コード例 #26
0
    def play_random_games(self,
                          num_games,
                          save_to_disk=False,
                          save_every=100,
                          save_path=''):
        '''Simulates a specified number of games where each game involves two agents
        randomly selected from the population.

        If you want to periodically save population to disk specify save_to_disk=True.
        Also specify how often you want to save in save_every, and the path in save_path. Default
        is current working path.'''

        #Initialize the progress bar
        progress = ProgressBar(widgets=[Percentage(),
                                        Bar(), ETA()],
                               maxval=num_games).start()

        if save_to_disk:
            #Make sure the path does not already exist. If it does, a new file name will be created adding '_n' to the end of
            #save_path, where n is a positive integer and is determined by whatever the last instance of save_path was saved as.
            while path.exists(save_path):
                #save_path already exists
                try:
                    old_file_num = int(save_path[save_path.find('_') + 1:])
                    new_file_num = old_file_num + 1
                    save_path = save_path[0:save_path.find('_')] + '_' + str(
                        new_file_num)
                except ValueError:
                    save_path = save_path + "_1"

            self.folder = save_path
            mkdir(save_path)
            self.population.save(path=save_path, suffix='0')

        # Pre-sample the stimuli
        stimuli = self.color_circle.sample([num_games, 2])

        #Run the games
        for i in range(num_games):
            #Randomly select the agents and the stimuli for a single game
            agent_keys = self.population.get_random_pair()

            # Save the game history
            #self.game_history.append([agent_keys, stimuli[i]])

            # Play the game
            self.play_game(agent_keys, stimuli[i])

            #Update the status bar
            progress.update(i + 1)

            #Save to disk
            if save_to_disk and (i % save_every) == 0:
                self.population.save(path=save_path, suffix=str(i))

        # Save the last iteration if it was not already saved
        if save_to_disk and (i % save_every) != 0:
            self.population.save(path=save_path, suffix=str(i))

        #End the status bar
        progress.finish()
コード例 #27
0
def NETSGraph(results, NETS_edges, node_labeler, node_type, edge_labeler):
    '''
    Function takes a json file of query results, a list of NETS edges, node and edge metadata dictionaries, and a
    dictionary containing NETS edge information by BIO node. Using these items the function creates the directed
    OWL-NETS abstraction network. Node metadata includes: labels (a list of human readable labels); id (the endpoint
    database identifiers); and bio (the NETS node type). Edge metadata includes: labels (human readable label for the
    edge between two NETS nodes) and id (the ontology concept term used to link the NETS nodes).
    :param results: json file containing the query results from endpoint
    :param NETS_edges: list of lists, where each list is a NETS edge and the order specifies a directional relationship
    :param node_labeler: node metadata nested lists (list[0] contains the NETS nodes label triples, list[1] contains the
    contains the NETS nodes identifier triples)
    :param node_type: dictionary with BIO node as key and set of NETS node types as value
    :param edge_labeler: dictionary where the keys are the NETS edges and the values are the edge labels
    :return: OWL-NETS directed graph
    '''
    print 'Started building OWL-NETS graph'

    # initialize progress bar progress bar
    widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')]
    pbar = ProgressBar(widgets=widgets,
                       maxval=len(results['results']['bindings']))

    NETS_graph = nx.DiGraph()

    for res in pbar(results['results']['bindings']):
        for edge in NETS_edges:

            i = res[str(
                edge[0].strip('?').encode('utf8'))]['value'].encode('utf8')
            j = res[str(
                edge[1].strip('?').encode('utf8'))]['value'].encode('utf8')

            # set nodes
            NETS_graph.add_node(
                min(node_labeler[edge[0].strip('?')][i]['label'], key=len),
                labels=node_labeler[edge[0].strip('?')][i]['label'],
                id=node_labeler[edge[0].strip('?')][i]['id'],
                bio=i,
                type='-'.join(list(node_type[i])))

            # gets second node in edge
            NETS_graph.add_node(
                min(node_labeler[edge[1].strip('?')][j]['label'], key=len),
                labels=node_labeler[edge[1].strip('?')][j]['label'],
                id=node_labeler[edge[1].strip('?')][j]['id'],
                bio=j,
                type='-'.join(list(node_type[j])))
            # add edge
            NETS_graph.add_edge(
                min(node_labeler[edge[0].strip('?')][i]['label'], key=len),
                min(node_labeler[edge[1].strip('?')][j]['label'], key=len),
                labels=res[(edge_labeler[tuple(edge)]['label']
                            ).strip('?')]['value'].encode('utf8'),
                id=(edge_labeler[tuple(edge)]['id']).strip('?'),
                edge='-'.join([edge[0].strip('?'), edge[1].strip('?')]))

    # closes first progress bar
    pbar.finish()
    print 'Finished building OWL-NETS graph'
    print '\n'

    # print information about graph
    print 'Directed OWL-NETS Graph has ' + str(len(
        NETS_graph.nodes())) + ' nodes, ' + str(len(
            NETS_graph.edges())) + ' edges, and ' + str(
                nx.number_connected_components(
                    NETS_graph.to_undirected())) + ' connected component(s)'

    return NETS_graph
コード例 #28
0
        time = odata_pt[:, 1]
        tindex = abs(time - pf.current_time.v).argmin()

    if args.subsample >= 0 and pf.h.max_level - args.undersample < args.subsample:
        print 'ERROR: Subsample must be less than max refine level - undersample.'
        sys.exit()

    maxval = np.empty(len(args.vars))
    minval = np.empty(len(args.vars))
    maxval.fill(-float("inf"))
    minval.fill(float("inf"))
    vals = list()
    pbar = ProgressBar(widgets=[
        'Determining histogram bounds and initial pass of data: ',
        Percentage(),
        Bar(), ' ',
        ETA()
    ],
                       maxval=len(pf.index.grids)).start()
    for cnt, g in enumerate(pf.index.grids):
        if g.Level > pf.h.max_level - args.undersample: continue
        if len(g.Children
               ) != 0 and g.Level != pf.h.max_level - args.undersample:
            continue

        evals = list()
        vvals = list()
        #vvals = g.get_data(args.var).ravel()
        for e, ev in enumerate(args.vars):
            vvals.append(g[ev].ravel())
        dvals = g["dens"].ravel().v * g["c12 "].ravel().v
コード例 #29
0
semester_codes = {'0': 'INTERIM', '1': 'SPRING', '6': 'SUMMER', '9': 'FALL'}

default_pickup = {
    'GEN': 'MUSME',
    'BUS': 'BUS',
    'MM': 'MUSME',
    'HEALTH': 'HLTH',
    'OXF': 'OXFD',
    'CHEM': 'CHEM',
    'THE': 'THEO',
    'LAW': 'LAW'
}

# widget for progress bar
pbar_widget = [Percentage(), ' ', ETA(), Bar()]


def unnone(str):
    return str if str is not None else ''


# get notes by  type for later reference so it will not take 2 hours to run 82,000 seperate queries
def get_notes(type, sep='; '):

    query = ''' SELECT n.target_id id, IFNULL(group_concat(n.note separator %s), '') notes
                FROM notes n
                WHERE n.type = %s
                GROUP BY n.target_id '''

    cursor = db.cursor(MySQLdb.cursors.DictCursor)
コード例 #30
0
        idxs_max = np.argsort(rsss)[:CLUSTERKEYSIZE]
        topaps[i] = macs[idxs_max]
        joinaps.append('|'.join(topaps[i]))
        toprss[i] = '|'.join(rsss[idxs_max])
    rawrmp[:,col_macs] = np.array(joinaps)
    rawrmp[:,col_rsss] = np.array(toprss)
    print 'Done'

    # Clustering heuristics.
    fp_field_names = FP_FIELD_NAMES['outdoor']  if 'lat' in csv_cols else FP_FIELD_NAMES['indoor'] 
    idxs_fp = [ csv_cols[col] for col in fp_field_names ]
    idx_time = fp_field_names.index('time')
    idx_rsss = fp_field_names.index('rsss')
    n_inserts = { 'n_newcids':0, 'n_newfps':0 }
    if verb:
        widgets = ['Incr-Clustering: ',Percentage(),' ',Bar(marker=RotatingMarker())]
        pbar = ProgressBar(widgets=widgets, maxval=num_rows*10).start()
    for idx, wlanmacs in enumerate(topaps):
        # Drop FPs with no wlan info.
        if not wlanmacs[0].strip() and len(wlanmacs) == 1:
            continue
        fp = rawrmp[ idx, idxs_fp ]
        found_cluster, result = search_cluster(macs=wlanmacs, fp=fp, wppdb=wppdb, idx_rsss=idx_rsss)
        fp = result['fp']
        # Strip time & rsss field.
        fp[idx_time] = fp[idx_time].replace(' ','')
        fp[idx_rsss] = fp[idx_rsss].replace(' ','')
        if not found_cluster:
            # Insert into cidaps/cfps with a new clusterid.
            new_cid = wppdb.addCluster(result['fp_macs'])
            wppdb.addFps(cid=new_cid, fps=[fp])