Python zip Examples - HotExamples

Example #1

0

Show file

File: views.py Project: nakul-chakrapani/CSC540-Gradiance

def course_topic_display_questions_handler(request, course_id = 'CSC540', homework_id = 'CSC540_1'):
	args = {}
	args.update(csrf(request))
	args['course'] = course_id	
	args['homework'] = homework_id

	if request.method == 'POST':
		topics_selected = request.POST.getlist('topic')

		cursor = connection.cursor()
		query = "select homework_mindifficulty, homework_maxdifficulty from homework where homework_id='" + homework_id + "';"
		cursor.execute(query)
	
		result = cursor.fetchall()
		mindifficulty = result[0][0]
		maxdifficulty = result[0][1]
		question_ids = []
		question_texts = []
	
		for topic in topics_selected:
			query = "select qb.questionbank_qid, qb.questionbank_text from questionbank qb where questionbank_level >= " + str(mindifficulty) + " AND questionbank_level <= " + str(maxdifficulty) + " AND questionbank_topic='" + topic +"';"
			cursor.execute(query)
			result = cursor.fetchall()

			for row in result:
				question_ids.append(row[0])
				question_texts.append(row[1])

		print zip(question_ids, question_texts)
		args['questionsList'] = zip(question_ids, question_texts)
		cursor.close()
		
		return render_to_response('course_select_questions.html', args)

Example #2

0

Show file

File: Reference.py Project: Charlie-George/cgat

 def __call__(self, track, slice=None):
     master = self.mMaster
     statement = "SELECT cov_mean, cov_median, sum FROM %(master)s_vs_%(track)s_readcoverage" % locals(
     )
     data = [(x[0], x[1], math.log(x[2]))
             for x in self.getAll(statement) if x[2] > 0]
     return odict(zip(("mean coverage", "median coverage", "length"), zip(*data)))

Example #3

0

Show file

File: flux_analysis.py Project: sriki18/cobrapy

    def test_double_deletion(self):
        cobra_model = self.model
        #turn into a double deletion unit test
        initialize_growth_medium(cobra_model, 'LB')
        #Expected growth rates for the salmonella model with deletions in LB medium
        the_loci =  ['STM4081', 'STM0247', 'STM3867', 'STM2952']
        the_genes = tpiA, metN, atpA, eno = list(map(cobra_model.genes.get_by_id, the_loci))
        growth_dict = {}
        growth_list = [[2.41, 2.389, 1.775, 1.81],
                       [2.389, 2.437, 1.86, 1.79],
                       [1.775, 1.86, 1.87, 1.3269],
                       [1.81, 1.79, 1.3269, 1.81]]
        for the_gene, the_rates in zip(the_genes, growth_list):
            growth_dict[the_gene] = dict(zip(the_genes, the_rates))


        the_solution = double_deletion(cobra_model, element_list_1=the_genes,
                                       element_list_2=the_genes)
        #Potential problem if the data object doesn't have a tolist function
        s_data = the_solution['data'].tolist()
        s_x = the_solution['x']
        s_y = the_solution['y']
        for gene_x, rates_x in zip(s_x, s_data):
            for gene_y, the_rate in zip(s_y, rates_x):
                self.assertAlmostEqual(growth_dict[gene_x][gene_y], the_rate,
                                       places=2)

Example #4

0

Show file

File: combine.py Project: Josuex09/spreads

 def download(self, cameras, path):
     left_dir = os.path.join(path, 'left')
     right_dir = os.path.join(path, 'right')
     target_dir = os.path.join(path, 'raw')
     if not os.path.exists(target_dir):
         os.mkdir(target_dir)
     left_pages = [os.path.join(left_dir, x)
                   for x in sorted(os.listdir(left_dir))]
     right_pages = [os.path.join(right_dir, x)
                    for x in sorted(os.listdir(right_dir))]
     # Write the orientation as a JPEG comment to the end of the file
     if len(left_pages) != len(right_pages):
         logger.warn("The left and right camera produced an inequal"
                     " amount of images, please fix the problem!")
         logger.warn("Will not combine images")
         return
     if (self.config['first_page']
             and not self.config['first_page'].get(str) == 'left'):
         combined_pages = reduce(operator.add, zip(right_pages, left_pages))
     else:
         combined_pages = reduce(operator.add, zip(left_pages, right_pages))
     logger.info("Combining images.")
     for idx, fname in enumerate(combined_pages):
         fext = os.path.splitext(os.path.split(fname)[1])[1]
         target_file = os.path.join(target_dir, "{0:04d}{1}"
                                    .format(idx, fext))
         shutil.copyfile(fname, target_file)
     shutil.rmtree(right_dir)
     shutil.rmtree(left_dir)

Example #5

0

Show file

File: test_huayno.py Project: mherkazandjian/amuse

    def test5(self):
        convert_nbody = nbody_system.nbody_to_si(5.0 | units.kg, 10.0 | units.m)

        instance = Huayno(convert_nbody)
        instance.initialize_code()
        
        particles = datamodel.Particles(2)
        self.assertEquals(len(instance.particles), 0)
        
        particles.mass = [15.0, 30.0] | units.kg
        particles.radius =  [10.0, 20.0] | units.m
        particles.position = [[10.0, 20.0, 30.0], [20.0, 40.0, 60.0]] | units.m
        particles.velocity = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] | units.m / units.s

        
        instance.particles.add_particles(particles)
        self.assertEquals(len(instance.particles), 2)
        
        instance.set_state(1, 16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 
                                 1.0|units.ms, 1.0|units.ms, 1.0|units.ms)
        
        curr_state =  instance.get_state(1)
        for expected, actural in zip((16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 
                                 1.0|units.ms, 1.0|units.ms, 1.0|units.ms, 0 | units.m), curr_state):
            self.assertAlmostRelativeEquals(actural,expected)
        
        instance.set_state(1, 16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 
                                 1.0|units.ms, 1.0|units.ms, 1.0|units.ms , 20.0|units.m)
        
        curr_state =  instance.get_state(1)
        for expected, actural in zip((16|units.kg, 20.0|units.m, 40.0|units.m, 60.0|units.m, 
                                 1.0|units.ms, 1.0|units.ms, 1.0|units.ms, 20 | units.m), curr_state):
            self.assertAlmostRelativeEquals(actural,expected)

Example #6

0

Show file

File: skeleton.py Project: h4ck3rm1k3/ferenda

    def parse(self, basefile):
        # Find out possible skeleton entries by loading the entire
        # graph of resource references, and find resources that only
        # exist as objects.
        #
        # Note: if we used download_from_triplestore we know that this list
        #       is clean -- we could just iterate the graph w/o filtering
        g = Graph()
        self.log.info("Parsing %s" % basefile)
        g.parse(self.store.downloaded_path(basefile), format="nt")
        self.log.info("Compiling object set")
        # create a uri -> True dict mapping -- maybe?
        objects = dict(zip([str(o).split("#")[0] for (s, p, o) in g], True))
        self.log.info("Compiling subject set")
        subjects = dict(zip([str(s).split("#")[0] for (s, p, o) in g], True))
        self.log.info("%s objects, %s subjects. Iterating through existing objects" %
                      (len(objects), len(subjects)))

        for o in objects:
            if not o.startswith(self.config.url):
                continue
            if '9999:999' in o:
                continue
            if o in subjects:
                continue
            for repo in otherrepos:
                skelbase = repo.basefile_from_uri(repo)
                if skelbase:
                    skel = repo.triples_from_uri(o)  # need to impl
                    with self.store.open_distilled(skelbase, "wb") as fp:
                        fp.write(skel.serialize(format="pretty-xml"))

                    self.log.info("Created skel for %s" % o)

Example #7

0

Show file

File: get_ltc_btc_depth-class.py Project: rjweyant1/get-exchange-data

def main():
    attrs = ('high', 'low', 'avg', 'vol', 'vol_cur', 'last',
             'buy', 'sell', 'updated', 'server_time')
    
    #initialize connection
    connection = btceapi.BTCEConnection()
    
    f = open('/media/Big Daddy/New_Documents/python_data/ltc_btc_depth.pkl', 'ab')     
    while 1:
       
        #sleep for .5 seconds, i.e. collect at 2Hz
        time.sleep(1)
    
        try:
            #get ticker
            ticker = btceapi.getTicker("ltc_btc", connection)
            #get asks/bids
            asks, bids = btceapi.getDepth("ltc_btc")
            ask_prices, ask_volumes = zip(*asks)
            bid_prices, bid_volumes = zip(*bids)
        
            #start list with all of the ticker info
            curTrades = trades(coin='ltc',updated=ticker.updated,server_time=ticker.server_time,ask_prices=ask_prices,ask_volumes=ask_volumes,bid_prices=bid_prices,bid_volumes=bid_volumes,buy=ticker.buy,sell=ticker.sell)
            #print out_list
            #now we have a huge list with all the info, write to a single line in the csv file
            
            # Pickle class using protocol 0.
            pickle.dump(curTrades,f)    
        
            #if connection is lost, just try to reconnect (this does seem to happen, so this line is actually pretty important for long data collects)
        except:
            connection = btceapi.BTCEConnection()
            pass

Example #8

0

Show file

File: checkstrformat.py Project: nierob/mypy

    def check_simple_str_interpolation(self, specifiers: List[ConversionSpecifier],
                                       replacements: Node) -> None:
        checkers = self.build_replacement_checkers(specifiers, replacements)
        if checkers is None:
            return

        rhs_type = self.accept(replacements)
        rep_types = []  # type: List[Type]
        if isinstance(rhs_type, TupleType):
            rep_types = rhs_type.items
        elif isinstance(rhs_type, AnyType):
            return
        else:
            rep_types = [rhs_type]

        if len(checkers) > len(rep_types):
            self.msg.too_few_string_formatting_arguments(replacements)
        elif len(checkers) < len(rep_types):
            self.msg.too_many_string_formatting_arguments(replacements)
        else:
            if len(checkers) == 1:
                check_node, check_type = checkers[0]
                check_node(replacements)
            elif isinstance(replacements, TupleExpr):
                for checks, rep_node in zip(checkers, replacements.items):
                    check_node, check_type = checks
                    check_node(rep_node)
            else:
                for checks, rep_type in zip(checkers, rep_types):
                    check_node, check_type = checks
                    check_type(rep_type)

Example #9

0

Show file

File: test_update_delete.py Project: CyberCollins/sqlalchemy

    def test_update(self):
        User, users = self.classes.User, self.tables.users

        sess = Session()

        john, jack, jill, jane = sess.query(User).order_by(User.id).all()
        sess.query(User).filter(User.age > 29).\
            update({'age': User.age - 10}, synchronize_session='evaluate')

        eq_([john.age, jack.age, jill.age, jane.age], [25, 37, 29, 27])
        eq_(sess.query(User.age).order_by(
            User.id).all(), list(zip([25, 37, 29, 27])))

        sess.query(User).filter(User.age > 29).\
            update({User.age: User.age - 10}, synchronize_session='evaluate')
        eq_([john.age, jack.age, jill.age, jane.age], [25, 27, 29, 27])
        eq_(sess.query(User.age).order_by(
            User.id).all(), list(zip([25, 27, 29, 27])))

        sess.query(User).filter(User.age > 27).\
            update(
                {users.c.age_int: User.age - 10},
                synchronize_session='evaluate')
        eq_([john.age, jack.age, jill.age, jane.age], [25, 27, 19, 27])
        eq_(sess.query(User.age).order_by(
            User.id).all(), list(zip([25, 27, 19, 27])))

        sess.query(User).filter(User.age == 25).\
            update({User.age: User.age - 10}, synchronize_session='fetch')
        eq_([john.age, jack.age, jill.age, jane.age], [15, 27, 19, 27])
        eq_(sess.query(User.age).order_by(
            User.id).all(), list(zip([15, 27, 19, 27])))

Example #10

0

Show file

File: bench_plot_randomized_svd.py Project: 0664j35t3r/scikit-learn

def scatter_time_vs_s(time, norm, point_labels, title):
    plt.figure()
    size = 100
    for i, l in enumerate(sorted(norm.keys())):
        if l is not "fbpca":
            plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size)
            for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
                plt.annotate(label, xy=(x, y), xytext=(0, -80),
                             textcoords='offset points', ha='right',
                             arrowprops=dict(arrowstyle="->",
                                             connectionstyle="arc3"),
                             va='bottom', size=11, rotation=90)
        else:
            plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size)
            for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
                plt.annotate(label, xy=(x, y), xytext=(0, 30),
                             textcoords='offset points', ha='right',
                             arrowprops=dict(arrowstyle="->",
                                             connectionstyle="arc3"),
                             va='bottom', size=11, rotation=90)

    plt.legend(loc="best")
    plt.suptitle(title)
    plt.ylabel("norm discrepancy")
    plt.xlabel("running time [s]")

Example #11

0

Show file

File: polygon_math.py Project: blink1073/image_inspector

def combine_polys(poly_old, poly_new, mode='add'):
    '''Add, subtract, or intersect two polygons
    '''
    # TODO: handle overlapping segments better
    if not mode:
        raise ValueError
    seg_old = gather_segments(poly_old)
    seg_new = gather_segments(poly_new)
    seg_new_all = break_segments(seg_new, seg_old)
    seg_old_all = break_segments(seg_old, seg_new)
    in_old = segments_in_polygon(seg_new_all, poly_old)
    in_new = segments_in_polygon(seg_old_all, poly_new)
    if mode == 'Add':
        keep_in_old = False
        keep_in_new = False
    elif mode == 'Subtract':
        keep_in_old = True
        keep_in_new = False
    else:
        keep_in_old = True
        keep_in_new = True
    seg_new = [seg for (seg, is_in_old) in zip(seg_new_all, in_old)
                    if (is_in_old == keep_in_old)]
    seg_old = [seg for (seg, is_in_new) in zip(seg_old_all, in_new)
                    if (is_in_new == keep_in_new)]
    seg = order_segments(seg_new + seg_old).tolist()
    return seg

Example #12

0

Show file

File: track_map.py Project: AliquesTomas/FroboMind

	def update(self):
		if self.plot_gnss and self.gnss != []:
			plt.figure(1)
			gnssT = zip(*self.gnss)		
			gnss_plt = plot(gnssT[0],gnssT[1],'black')
			ref_gnssT = zip(*self.ref_gnss)		
			ref_gnss_plt = plot(ref_gnssT[0],ref_gnssT[1],'b')
		if self.plot_pose and self.pose_pos != []:
			plt.figure(1)
			poseT = zip(*self.pose_pos)		
			pose_plt = plot(poseT[0],poseT[1],'r')
		if self.plot_odometry and self.odo != []:
			plt.figure(2)
			odoT = zip(*self.odo)		
			odo_plt = plot(odoT[0],odoT[1],'b')
		if self.plot_yaw:
			if  self.odo_yaw != []:
				plt.figure(3)
				odo_yaw_plt = plot(self.odo_yaw,'b')
			if  self.ahrs_yaw != []:
				plt.figure(3)
				ahrs_yaw_plt = plot(self.ahrs_yaw,'g')
			if  self.gnss_yaw != []:
				plt.figure(3)
				gnss_yaw_plt = plot(self.gnss_yaw, 'black')
			if  self.pose_yaw != []:
				plt.figure(3)
				pose_yaw_plt = plot(self.pose_yaw,'r')
		if self.plot_gnss or self.plot_pose or self.plot_odometry or self.plot_yaw:
			draw()

Example #13

0

Show file

File: nanguardmode.py Project: 5730279821-TA/Theano

        def nan_check(i, node, fn):
            """
            Runs `fn` while checking its inputs and outputs for NaNs / Infs.

            Parameters
            ----------
            i :
                Currently ignored.
                TODO: determine why it is here or remove).
            node : theano.gof.Apply
                The Apply node currently being executed.
            fn : callable
                The thunk to execute for this Apply node.

            """
            inputs = fn.inputs
            for x, var in zip(inputs, node.inputs):
                # If the input is the result of computation, then we
                # don't need to check it. It is already done after the
                # computation.
                if (var.owner is None and
                        getattr(var.tag, 'nan_guard_mode_check', True)):
                    do_check_on(x[0], node, fn, True)
            fn()
            outputs = fn.outputs
            for x, var in zip(outputs, node.outputs):
                if getattr(var.tag, 'nan_guard_mode_check', True):
                    do_check_on(x[0], node, fn, False)

Example #14

0

Show file

File: championSimplifier.py Project: Anti213/RiotAPI-project

def main():

	# THIS CODE IS RUN ONE TIME ONLY (#SAVETHESERVERS)
	
	# For Champions:
	#api = RiotAPI('3e888957-13a3-4ba2-901c-fae3e421d998')
	#r = api.get_championList()

	#with open('testChampion.json', 'w') as outfile:
	#	json.dump(r, outfile, indent=4)
	
	# --------------------------------------------------------

	with open('testChampion.json', 'r') as data_file:
		data = json.load(data_file)

	championIds = [data['data'][championName]['id'] 
				   for championName in data['data']]	
	championNames = [championName for championName in data['data']]
	championTitles = [data['data'][championName]['title'] 
				      for championName in data['data']]

	championReference = dict(zip(championIds, 
							 zip(championNames, championTitles)))
	
	with open('championReference.json', 'w') as outfile:
		json.dump(championReference, outfile, indent=4)

Example #15

0

Show file

File: direction_reconstruction.py Project: tomkooij/sapphire

    def reconstruct(self, t, x, y, z, core_x, core_y):
        """Reconstruct angles for many detections

        :param t: arrival times in the detectors in ns.
        :param x,y,z: positions of the detectors in m.
        :param core_x,core_y: core position at z = 0 in m.
        :return: theta as derived by Montanus2014,
                 phi as derived by Montanus2014.

        """
        if not logic_checks(t, x, y, z):
            return nan, nan

        regress2d = RegressionAlgorithm()
        theta, phi = regress2d.reconstruct_common(t, x, y)

        dtheta = 1.
        iteration = 0
        while dtheta > 0.001:
            iteration += 1
            if iteration > self.MAX_ITERATIONS:
                return nan, nan
            nxnz = tan(theta) * cos(phi)
            nynz = tan(theta) * sin(phi)
            nz = cos(theta)
            x_proj = [xi - zi * nxnz for xi, zi in zip(x, z)]
            y_proj = [yi - zi * nynz for yi, zi in zip(y, z)]
            t_proj = [ti + zi / (c * nz) -
                      self.time_delay(xpi, ypi, core_x, core_y, theta, phi)
                      for ti, xpi, ypi, zi in zip(t, x_proj, y_proj, z)]
            theta_prev = theta
            theta, phi = regress2d.reconstruct_common(t_proj, x_proj, y_proj)
            dtheta = abs(theta - theta_prev)

        return theta, phi

Example #16

0

Show file

File: diagnostics.py Project: borisaqua/pymc

def discrepancy(observed, simulated, expected):
    """Calculates Freeman-Tukey statistics (Freeman and Tukey 1950) as
    a measure of discrepancy between observed and r replicates of simulated data. This
    is a convenient method for assessing goodness-of-fit (see Brooks et al. 2000).
    
    D(x|\theta) = \sum_j (\sqrt{x_j} - \sqrt{e_j})^2
    
    :Parameters:
      observed : Iterable of observed values (length n)
      simulated : Iterable of simulated values (length rxn)
      expected : Iterable of expected values (length rxn)
    
    :Returns:
      D_obs : Discrepancy of observed values
      D_sim : Discrepancy of simulated values
    
    """
    try:
        simulated = simulated.astype(float)
    except AttributeError:
        simulated = simulated.trace().astype(float)
    try:
        expected = expected.astype(float)
    except AttributeError:
        expected = expected.trace().astype(float)
    
    D_obs = np.sum([(np.sqrt(observed)-np.sqrt(e))**2 for e in expected], 1)
    D_sim = np.sum([(np.sqrt(s)-np.sqrt(e))**2 for s,e in zip(simulated, expected)], 1)
    
    # Print p-value
    count = sum(s>o for o,s in zip(D_obs,D_sim))
    print_('Bayesian p-value: p=%.3f' % (1.*count/len(D_obs)))
    
    return D_obs, D_sim

Example #17

0

Show file

File: simulator.py Project: finnergizer/hamming-decoder-factor-graph

 def save_results(self):
     """
     Saves the bit_error_probability and corresponding variance in a csv file
     Saves the codewords, their transmissions, and their resulting decoding in files separted by channel noise variance
     :return:
     """
     self.save_time = epoch_time = str(int(time.time()))
     if not os.path.exists("./stats/advanced-run"):
         os.makedirs("./stats/advanced-run")
     m = "sum-prod" if self.mode == Decoder.SUM_PROD else "max-prod"
     with open("stats/advanced-run/%(time)s-%(mode)s-%(num_codewords)s-codewords-variance-bit_error_probability.csv" % {
     "time": epoch_time,
     "mode": m,
     "num_codewords": self.iterations}, 'wb') as result_csv:
         writer = csv.writer(result_csv)
         writer.writerow(["variance", "bit_error_probability"])
         for v, error in zip(self.variance_levels, self.bit_error_probability):
             writer.writerow([v, error])
     for i, v in enumerate(self.variance_levels):
         with open("stats/advanced-run/%(time)s-%(mode)s-%(num_codewords)s-codewords-variance-%(var)s-codewords-decoded.csv" % {
         "time": epoch_time,
         "mode": m,
         "num_codewords": str(self.iterations),
         "var": str(v)}, 'wb') as result_csv:
             writer = csv.writer(result_csv)
             writer.writerow(["codeword", "decoded", "transmission"])
             for codeword, transmission, decoded in zip(self.codewords[i], self.transmissions[i], self.decoded[i]):
                 writer.writerow([''.join(str(elem) for elem in codeword),
                                  ''.join(str(elem) for elem in decoded),
                                  ' '.join(str(elem) for elem in transmission)])

Example #18

0

Show file

File: test_transform.py Project: tarsh/shapely

 def test_scale(self):
     ls = load_wkt('LINESTRING(240 400 10, 240 300 30, 300 300 20)')
     # test defaults of 1.0
     sls = transform.scale(ls)
     self.assertTrue(sls.equals(ls))
     # different scaling in different dimensions
     sls = transform.scale(ls, 2, 3, 0.5)
     els = load_wkt('LINESTRING(210 500 5, 210 200 15, 330 200 10)')
     self.assertTrue(sls.equals(els))
     # Do explicit 3D check of coordinate values
     for a, b in zip(sls.coords, els.coords):
         for ap, bp in zip(a, b):
             self.assertEqual(ap, bp)
     # retest with named parameters for the same result
     sls = transform.scale(geom=ls, xfact=2, yfact=3, zfact=0.5,
                           origin='center')
     self.assertTrue(sls.equals(els))
     ## other `origin` parameters
     # around the centroid
     sls = transform.scale(ls, 2, 3, 0.5, origin='centroid')
     els = load_wkt('LINESTRING(228.75 537.5, 228.75 237.5, 348.75 237.5)')
     self.assertTrue(sls.equals(els))
     # around the second coordinate tuple
     sls = transform.scale(ls, 2, 3, 0.5, origin=ls.coords[1])
     els = load_wkt('LINESTRING(240 600, 240 300, 360 300)')
     self.assertTrue(sls.equals(els))
     # around some other 3D Point of origin
     sls = transform.scale(ls, 2, 3, 0.5, origin=Point(100, 200, 1000))
     els = load_wkt('LINESTRING(380 800 505, 380 500 515, 500 500 510)')
     self.assertTrue(sls.equals(els))
     # Do explicit 3D check of coordinate values
     for a, b in zip(sls.coords, els.coords):
         for ap, bp in zip(a, b):
             self.assertEqual(ap, bp)

Example #19

0

Show file

File: resample_test.py Project: DILASSS/tensorflow

  def _testDistribution(self, initial_known):
    classes = np.random.randint(5, size=(20000,))  # Uniformly sampled
    target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
    initial_dist = [0.2] * 5 if initial_known else None
    iterator = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
        200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply(
            resampling.rejection_resample(
                target_dist=target_dist,
                initial_dist=initial_dist,
                class_func=lambda c, _: c,
                seed=27)).make_initializable_iterator())
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.test_session() as sess:
      sess.run(init_op)
      returned = []
      with self.assertRaises(errors.OutOfRangeError):
        while True:
          returned.append(sess.run(get_next))

    returned_classes, returned_classes_and_data = zip(*returned)
    _, returned_data = zip(*returned_classes_and_data)
    self.assertAllEqual([compat.as_bytes(str(c))
                         for c in returned_classes], returned_data)
    total_returned = len(returned_classes)
    # Subsampling rejects a large percentage of the initial data in
    # this case.
    self.assertGreater(total_returned, 20000 * 0.2)
    class_counts = np.array([
        len([True for v in returned_classes if v == c])
        for c in range(5)])
    returned_dist = class_counts / total_returned
    self.assertAllClose(target_dist, returned_dist, atol=1e-2)

Example #20

0

Show file

File: colorbar.py Project: alfonsodiecko/PYTHON_DIST

    def add_lines(self, levels, colors, linewidths, erase=True):
        """
        Draw lines on the colorbar.

        *colors* and *linewidths* must be scalars or
        sequences the same length as *levels*.

        Set *erase* to False to add lines without first
        removing any previously added lines.
        """
        y = self._locate(levels)
        igood = (y < 1.001) & (y > -0.001)
        y = y[igood]
        if cbook.iterable(colors):
            colors = np.asarray(colors)[igood]
        if cbook.iterable(linewidths):
            linewidths = np.asarray(linewidths)[igood]
        N = len(y)
        x = np.array([0.0, 1.0])
        X, Y = np.meshgrid(x, y)
        if self.orientation == "vertical":
            xy = [list(zip(X[i], Y[i])) for i in range(N)]
        else:
            xy = [list(zip(Y[i], X[i])) for i in range(N)]
        col = collections.LineCollection(xy, linewidths=linewidths)

        if erase and self.lines:
            for lc in self.lines:
                lc.remove()
            self.lines = []
        self.lines.append(col)
        col.set_color(colors)
        self.ax.add_collection(col)

Example #21

0

Show file

File: tf_inspect.py Project: AlbertXiebnu/tensorflow

def getcallargs(func, *positional, **named):
  """TFDecorator-aware replacement for inspect.getcallargs.

  Args:
    func: A callable, possibly decorated
    *positional: The positional arguments that would be passed to `func`.
    **named: The named argument dictionary that would be passed to `func`.

  Returns:
    A dictionary mapping `func`'s named arguments to the values they would
    receive if `func(*positional, **named)` were called.

  `getcallargs` will use the argspec from the outermost decorator that provides
  it. If no attached decorators modify argspec, the final unwrapped target's
  argspec will be used.
  """
  argspec = getargspec(func)
  call_args = named.copy()
  this = getattr(func, 'im_self', None) or getattr(func, '__self__', None)
  if ismethod(func) and this:
    positional = (this,) + positional
  remaining_positionals = [arg for arg in argspec.args if arg not in call_args]
  call_args.update(dict(zip(remaining_positionals, positional)))
  default_count = 0 if not argspec.defaults else len(argspec.defaults)
  if default_count:
    for arg, value in zip(argspec.args[-default_count:], argspec.defaults):
      if arg not in call_args:
        call_args[arg] = value
  return call_args

Example #22

0

Show file

File: densearith.py Project: QuaBoo/sympy

def dup_add(f, g, K):
    """
    Add dense polynomials in ``K[x]``.

    Examples
    ========

    >>> from sympy.polys import ring, ZZ
    >>> R, x = ring("x", ZZ)

    >>> R.dup_add(x**2 - 1, x - 2)
    x**2 + x - 3

    """
    if not f:
        return g
    if not g:
        return f

    df = dup_degree(f)
    dg = dup_degree(g)

    if df == dg:
        return dup_strip([ a + b for a, b in zip(f, g) ])
    else:
        k = abs(df - dg)

        if df > dg:
            h, f = f[:k], f[k:]
        else:
            h, g = g[:k], g[k:]

        return h + [ a + b for a, b in zip(f, g) ]

Example #23

0

Show file

File: test_source_space.py Project: teonbrooks/mne-python

def test_add_patch_info():
    """Test adding patch info to source space."""
    # let's setup a small source space
    src = read_source_spaces(fname_small)
    src_new = read_source_spaces(fname_small)
    for s in src_new:
        s['nearest'] = None
        s['nearest_dist'] = None
        s['pinfo'] = None

    # test that no patch info is added for small dist_limit
    try:
        add_source_space_distances(src_new, dist_limit=0.00001)
    except RuntimeError:  # what we throw when scipy version is wrong
        pass
    else:
        assert all(s['nearest'] is None for s in src_new)
        assert all(s['nearest_dist'] is None for s in src_new)
        assert all(s['pinfo'] is None for s in src_new)

    # now let's use one that works
    add_source_space_distances(src_new)

    for s1, s2 in zip(src, src_new):
        assert_array_equal(s1['nearest'], s2['nearest'])
        assert_allclose(s1['nearest_dist'], s2['nearest_dist'], atol=1e-7)
        assert_equal(len(s1['pinfo']), len(s2['pinfo']))
        for p1, p2 in zip(s1['pinfo'], s2['pinfo']):
            assert_array_equal(p1, p2)

Example #24

0

Show file

File: plot_throuput.py Project: AIasd/MATH442_HW6

def var_fit(var_mean,var_name):
	size_list = []
	throuput_list = []
	for var in var_mean:
		get_per,set_per = normal_ops
		get_per_s = "%.2f" % get_per
		set_per_s = "%.2f" % set_per
		if (var_name=='key'):
			upperlimit = 64
			filename = 'data_collected/'+str(var)+'_'+str(normal_value)+'_'+str(normal_hash)+'_'+get_per_s+'_'+set_per_s+'.out'
		elif (var_name=='value'):
			upperlimit = 2048
			filename = 'data_collected/'+str(normal_key)+'_'+str(var)+'_'+str(normal_hash)+'_'+get_per_s+'_'+set_per_s+'.out'
		with open(filename,'r') as f:
			load, time = zip(*[(int(line.strip().split(',')[0]), float(line.strip().split(',')[1])) for line in f])			
			z = np.polyfit(time,load,2)
			p = np.poly1d(z)
			size = var
			throuput = p(1)
			size_list.append(size)
			throuput_list.append(throuput)
	#Record raw data point
	with open ('data_collected/'+var_name+'_throuput','w') as g:
		for size,throuput in zip(size_list,throuput_list):
			g.write(str(size)+','+str(throuput)+'\n')	

	#Recide fit data point
	z = np.polyfit(np.array(size_list),np.array(throuput_list),1)
	p = np.poly1d(z)
	size_fit_list = [i for i in range(1,upperlimit)]
	throuput_fit_list = [p(i) for i in size_fit_list]
	with open ('data_collected/'+var_name+'_throuput_fit','w') as g:
		for size,throuput in zip(np.array(size_fit_list),np.array(throuput_fit_list)):
			g.write(str(size)+','+str(throuput)+'\n')
	var_plot(var_name,list(z))

Example #25

0

Show file

File: admin_match_controller.py Project: BowlesCR/the-blue-alliance

    def post(self):
        self._require_admin()

        additions = json.loads(self.request.get("youtube_additions_json"))
        match_keys, youtube_videos = zip(*additions["videos"])
        matches = ndb.get_multi([ndb.Key(Match, match_key) for match_key in match_keys])

        matches_to_put = []
        results = {"existing": [], "bad_match": [], "added": []}
        for (match, match_key, youtube_video) in zip(matches, match_keys, youtube_videos):
            if match:
                if youtube_video not in match.youtube_videos:
                    match.youtube_videos.append(youtube_video)
                    match.dirty = True  # hacky
                    matches_to_put.append(match)
                    results["added"].append(match_key)
                else:
                    results["existing"].append(match_key)
            else:
                results["bad_match"].append(match_key)

        MatchManipulator.createOrUpdate(matches_to_put)

        self.template_values.update({
            "results": results,
        })

        path = os.path.join(os.path.dirname(__file__), '../../templates/admin/videos_add.html')
        self.response.out.write(template.render(path, self.template_values))

Example #26

0

Show file

File: rt_relation_test.py Project: gviejo/Gohal

def plotStars(data, pos):
    n = len(data)
    #pos = [0.6, 0.68, 0.75, 0.82, 0.97]    
    d = -0.001
    # side by side 
    for i, j in zip(xrange(1, n), xrange(n-1)):
        if data[i, j] != 0:            
            ax.plot([j+width-0.15, i+0.15], [pos[0], pos[0]], linewidth = 2, color = 'black')
            ax.text(j+width+0.15, pos[0]+d, "*"*data[i,j])
    # two side
    for i,j in zip(xrange(2, n), xrange(n-2)):
        if data[i,j] != 0 and j%2 != 0:
            ax.plot([j+width-0.15, i+0.15], [pos[1], pos[1]], linewidth = 2, color = 'black')
            ax.text(j+2*width+0.15, pos[1]+d, "*"*data[i,j])
        elif data[i,j] != 0 and j%2 == 0:
            ax.plot([j+width-0.15, i+0.15], [pos[2], pos[2]], linewidth = 2, color = 'black')
            ax.text(j+2*width+0.15, pos[2]+d, "*"*data[i,j])
    # 
    for i,j in zip(xrange(3, n), xrange(n-3)):
        if data[i,j] != 0 and j%2 != 0:
            ax.plot([j+width-0.15, i+0.15], [pos[3], pos[3]], linewidth = 2, color = 'black')
            ax.text(j+3*width+0.15, pos[3]+d, "*"*data[i,j])
        elif data[i,j] != 0 and j%2 == 0:
            ax.plot([j+width-0.15, i+0.15], [pos[4], pos[4]], linewidth = 2, color = 'black')
            ax.text(j+3*width+0.15, pos[4]+d, "*"*data[i,j])
    #
    for i,j in zip(xrange(4, n), xrange(n-4)):
        if data[i,j] != 0 and j%2 == 0:
            ax.plot([j+width-0.15, i+0.15], [pos[5], pos[5]], linewidth = 2, color = 'black')
            ax.text(j+4*width+0.15, pos[5]+d, "*"*data[i,j])
        elif data[i,j] != 0 and j%2 != 0:
            ax.plot([j+width-0.15, i+0.15], [pos[6], pos[6]], linewidth = 2, color = 'black')
            ax.text(j+4*width+0.15, pos[6]+d, "*"*data[i,j])

Example #27

0

Show file

File: ann.py Project: taylorjacklespriggs/sigex

 def train(self, inp, out, training_weight=1.):
     inp = np.mat(inp).T
     out = np.mat(out).T
     deriv = []
     val = inp
     vals = [val]
     # forward calculation of activations and derivatives
     for weight,bias in self.__weights:
         val = weight*val
         val += bias
         deriv.append(self.__derivative(val))
         vals.append(self.__activation(val))
     deriv = iter(reversed(deriv))
     weights = iter(reversed(self.__weights))
     errs = []
     errs.append(np.multiply(vals[-1]-out, next(deriv)))
     # backwards propagation of errors
     for (w,b),d in zip(weights, deriv):
         errs.append(np.multiply(np.dot(w.T, errs[-1]), d))
     weights = iter(self.__weights)
     for (w,b),v,e in zip(\
             self.__weights,\
             vals, reversed(errs)):
         e *= self.__learning_rate*training_weight
         w -= e*v.T
         b -= e
     tmp = vals[-1]-out
     return np.dot(tmp[0].T,tmp[0])*.5*training_weight

Example #28

0

Show file

File: readinput.py Project: raoanirudh/oq-risklib

def get_mesh(oqparam):
    """
    Extract the mesh of points to compute from the sites,
    the sites_csv, or the region.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    """
    if oqparam.sites:
        lons, lats = zip(*sorted(oqparam.sites))
        return geo.Mesh(numpy.array(lons), numpy.array(lats))
    elif 'sites' in oqparam.inputs:
        csv_data = open(oqparam.inputs['sites'], 'U').read()
        coords = valid.coordinates(
            csv_data.strip().replace(',', ' ').replace('\n', ','))
        lons, lats = zip(*sorted(coords))
        return geo.Mesh(numpy.array(lons), numpy.array(lats))
    elif oqparam.region:
        # close the linear polygon ring by appending the first
        # point to the end
        firstpoint = geo.Point(*oqparam.region[0])
        points = [geo.Point(*xy) for xy in oqparam.region] + [firstpoint]
        try:
            return geo.Polygon(points).discretize(oqparam.region_grid_spacing)
        except:
            raise ValueError(
                'Could not discretize region %(region)s with grid spacing '
                '%(region_grid_spacing)s' % vars(oqparam))
    elif 'site_model' in oqparam.inputs:
        coords = [(param.lon, param.lat) for param in get_site_model(oqparam)]
        lons, lats = zip(*sorted(coords))
        return geo.Mesh(numpy.array(lons), numpy.array(lats))

Example #29

0

Show file

File: test.py Project: rkm3/gspread

    def test_get_all_records_different_header(self):
        self.sheet.resize(6, 4)
        # put in new values, made from three lists
        rows = [["", "", "", ""],
                ["", "", "", ""],
                ["A1", "B1", "", "D1"],
                [1, "b2", 1.45, ""],
                ["", "", "", ""],
                ["A4", 0.4, "", 4]]
        cell_list = self.sheet.range('A1:D6')
        for cell, value in zip(cell_list, itertools.chain(*rows)):
            cell.value = value
        self.sheet.update_cells(cell_list)

        # first, read empty strings to empty strings
        read_records = self.sheet.get_all_records(head=3)
        d0 = dict(zip(rows[2], rows[3]))
        d1 = dict(zip(rows[2], rows[4]))
        d2 = dict(zip(rows[2], rows[5]))
        self.assertEqual(read_records[0], d0)
        self.assertEqual(read_records[1], d1)
        self.assertEqual(read_records[2], d2)

        # then, read empty strings to zeros
        read_records = self.sheet.get_all_records(empty2zero=True, head=3)
        d1 = dict(zip(rows[2], (0, 0, 0, 0)))
        self.assertEqual(read_records[1], d1)

Example #30

0

Show file

File: densearith.py Project: QuaBoo/sympy

def dup_sub(f, g, K):
    """
    Subtract dense polynomials in ``K[x]``.

    Examples
    ========

    >>> from sympy.polys import ring, ZZ
    >>> R, x = ring("x", ZZ)

    >>> R.dup_sub(x**2 - 1, x - 2)
    x**2 - x + 1

    """
    if not f:
        return dup_neg(g, K)
    if not g:
        return f

    df = dup_degree(f)
    dg = dup_degree(g)

    if df == dg:
        return dup_strip([ a - b for a, b in zip(f, g) ])
    else:
        k = abs(df - dg)

        if df > dg:
            h, f = f[:k], f[k:]
        else:
            h, g = dup_neg(g[:k], K), g[k:]

        return h + [ a - b for a, b in zip(f, g) ]

Example #31

0

Show file

File: parseridge.py Project: yjfiejd/parseridge

    def _update_classification_scores(self, configurations):
        """
        Wraps the stacks, buffers and contextualized token data of all
        given configurations into a tensor which is then passed through
        the MLP to compute the classification scores in the configurations.
        :param configurations: List of not finished Configurations
        :return: Updated Configurations
        """
        stacks = [c.stack_tensor for c in configurations]
        stacks_padded = pad_tensor_list(stacks)
        stacks_lengths = torch.tensor(
            [len(c.stack) for c in configurations], dtype=torch.int64, device=self.device
        )

        buffers = [c.buffer_tensor for c in configurations]
        buffers_padded = pad_tensor_list(buffers)
        buffer_lengths = torch.tensor(
            [len(c.buffer) for c in configurations], dtype=torch.int64, device=self.device
        )

        clf_transitions, clf_relations = self.model(
            sentences=torch.stack([c.sentence_features for c in configurations]),
            sentence_lengths=None,
            sentence_encoding_batch=[c.contextualized_input for c in configurations],
            buffers=buffers_padded,
            buffer_lengths=buffer_lengths,
            stacks=stacks_padded,
            stack_lengths=stacks_lengths,
        )

        # Isolate the columns for the transitions
        left_arc = clf_transitions[:, T.LEFT_ARC.value].view(-1, 1)
        right_arc = clf_transitions[:, T.RIGHT_ARC.value].view(-1, 1)
        shift = clf_transitions[:, T.SHIFT.value].view(-1, 1)
        swap = clf_transitions[:, T.SWAP.value].view(-1, 1)

        # Isolate the columns for the different relations
        relation_slices = self.model.relations.slices
        shift_relations = clf_relations[:, relation_slices[T.SHIFT]]
        swap_relations = clf_relations[:, relation_slices[T.SWAP]]
        left_arc_relations = clf_relations[:, relation_slices[T.LEFT_ARC]]
        right_arc_relations = clf_relations[:, relation_slices[T.RIGHT_ARC]]

        # Add them in one batch
        shift_score_batch = torch.add(shift, shift_relations)
        swap_score_batch = torch.add(swap, swap_relations)
        left_arc_scores_batch = torch.add(left_arc, left_arc_relations)
        right_arc_scores_batch = torch.add(right_arc, right_arc_relations)

        # For the left and right arc scores, we're only interested in the
        # two best entries, so we extract then in one go.
        left_arc_scores_sorted, left_arc_scores_indices = torch.sort(
            left_arc_scores_batch, descending=True
        )
        right_arc_scores_sorted, right_arc_scores_indices = torch.sort(
            right_arc_scores_batch, descending=True
        )

        # Only take the best two items
        left_arc_scores_sorted = left_arc_scores_sorted[:, :2]
        right_arc_scores_sorted = right_arc_scores_sorted[:, :2]

        # We need them later in RAM, so retrieve them all at once from the gpu
        left_arc_scores_indices = left_arc_scores_indices[:, :2].cpu().numpy()
        right_arc_scores_indices = right_arc_scores_indices[:, :2].cpu().numpy()

        class Combination(NamedTuple):
            configuration: Configuration

            shift_score: torch.Tensor
            swap_score: torch.Tensor

            left_arc_scores: torch.Tensor
            left_arc_scores_indices: np.array
            left_arc_scores_sorted: torch.Tensor

            right_arc_scores: torch.Tensor
            right_arc_scores_indices: np.array
            right_arc_scores_sorted: torch.Tensor

            def apply(self):
                self.configuration.scores = {
                    T.SHIFT: self.shift_score,
                    T.SWAP: self.swap_score,
                    T.LEFT_ARC: self.left_arc_scores,
                    (T.LEFT_ARC, "best_scores"): self.left_arc_scores_sorted,
                    (T.LEFT_ARC, "best_scores_indices"): self.left_arc_scores_indices,
                    T.RIGHT_ARC: self.right_arc_scores,
                    (T.RIGHT_ARC, "best_scores"): self.right_arc_scores_sorted,
                    (T.RIGHT_ARC, "best_scores_indices"): self.right_arc_scores_indices,
                }

        combinations = zip(
            configurations,
            shift_score_batch,
            swap_score_batch,
            left_arc_scores_batch,
            left_arc_scores_indices,
            left_arc_scores_sorted,
            right_arc_scores_batch,
            right_arc_scores_indices,
            right_arc_scores_sorted,
        )

        # Update the result of the classifiers in the configurations
        for combination in combinations:
            Combination(*combination).apply()

        return configurations, clf_transitions, clf_relations

Example #32

0

Show file

File: rawr.py Project: ihsgnef/pathologies

def remove_one(model,
               batch,
               n_beams,
               indices,
               removed_indices,
               max_beam_size,
               target_s=None,
               target_e=None):
    n_examples = len(n_beams)
    has_label = False
    if target_s is not None and target_e is not None:
        has_label = True
        onehot_grad = get_onehot_grad(model, batch, target_s, target_e)
    else:
        onehot_grad = get_onehot_grad(model, batch)

    onehot_grad = onehot_grad.data.cpu().numpy()
    question = batch[5]
    question_mask = batch[6]
    question_lengths = [real_length(x) for x in question]

    new_batch = []
    new_n_beams = []
    new_indices = []
    new_removed_indices = []
    if has_label:
        new_target_s = []
        new_target_e = []

    start = 0
    for example_idx in range(n_examples):
        if n_beams[example_idx] == 0:
            new_n_beams.append(0)
            continue

        coordinates = []  # i_in_batch, j_in_question
        # ignore PADs
        for i in range(start, start + n_beams[example_idx]):
            if real_length(question[i]) == 1:
                continue
            word_order = np.argsort(-onehot_grad[i][:question_lengths[i]])
            coordinates += [(i, j) for j in word_order[:max_beam_size]]

        if len(coordinates) == 0:
            start += n_beams[example_idx]
            new_n_beams.append(0)
            continue

        coordinates = np.asarray(coordinates)
        scores = onehot_grad[coordinates[:, 0], coordinates[:, 1]]
        scores = sorted(zip(coordinates, scores), key=lambda x: -x[1])
        coordinates = np.asarray([x for x, _ in scores[:max_beam_size]])
        assert all(j < question_lengths[i] for i, j in coordinates)
        if not all(j < len(indices[i]) for i, j in coordinates):
            for i, j in coordinates:
                print('i', i)
                print('j', j)
                print('ql', question_lengths[i])
                print('len(indices)', len(indices[i]))
                print(indices[i])
                print()

        cnt = 0
        for i, j in coordinates:
            # because stupid tensor doesn't support proper indexing
            q, qm = [], []
            if j > 0:
                q.append(question[i][:j])
                qm.append(question_mask[i][:j])
            if j + 1 < question[i].shape[0]:
                q.append(question[i][j + 1:])
                qm.append(question_mask[i][j + 1:])
            if len(q) > 0:
                new_entry = [x[i] for x in batch]
                new_entry[5] = torch.cat(q, 0)
                new_entry[6] = torch.cat(qm, 0)
                new_batch.append(new_entry)
                new_removed_indices.append(removed_indices[i] +
                                           [indices[i][j]])
                new_indices.append(indices[i][:j] + indices[i][j + 1:])
                if has_label:
                    new_target_s.append(target_s[i])
                    new_target_e.append(target_e[i])
                cnt += 1
        start += n_beams[example_idx]
        new_n_beams.append(cnt)

    new_batch = list(map(list, zip(*new_batch)))
    batch = [torch.stack(c, 0) for c in new_batch[:7]]
    batch += new_batch[7:]
    if has_label:
        new_target_s = torch.cat(new_target_s)
        new_target_e = torch.cat(new_target_e)
        return batch, new_n_beams, new_indices, new_removed_indices, \
            new_target_s, new_target_e
    else:
        return batch, new_n_beams, new_indices, new_removed_indices

Example #33

0

Show file

File: pt_toy_example.py Project: wangyf/beat

def enum(*sequential, **named):
    """Handy way to fake an enumerated type in Python
    http://stackoverflow.com/questions/36932/how-can-i-represent-an-enum-in-python
    """
    enums = dict(zip(sequential, range(len(sequential))), **named)
    return type('Enum', (), enums)

Example #34

0

Show file

File: vo_test.py Project: yjkimjunior/astropy

 def test_ref_table(self):
     tables = list(self.votable.iter_tables())
     for x, y in zip(tables[0].array.data[0], tables[1].array.data[0]):
         assert_array_equal(x, y)

Example #35

0

Show file

File: mpy_scrapedetdb.py Project: yaseen157/MushroomPy

def getCellWidthData(queryfuel):
    """Given a queryfuel, return a list of dictionaries of all Cell Width data scraped from the 
    detonation database.
    
    **Parameters:**
    queryfuel
        string, The text with which the website denotes a fuel you'd like to query.
    
    **Returns:**
    returndict
        dictionary, structured with a filename as a key, and its associated dataframe as a value.
    
    **Example:**
    ::
        c2h4data = getCellWidthData(queryfuel="C2H4")

        for _, (k, v) in enumerate(c2h4data.items()):
            print(f"This is the filename: | {k} | and here is the data:")
            print(v)
            print("")

    Output:
    ::
        ...

        This is the filename: | at172d.txt | and here is the data:
            Category  Fuel Sub-Category Oxidizer  Initial Pressure (kPa) Diluent Equivalence Ratio  Initial Temperature (K)  Cell Width (mm)
        0  cell size  C2H4        width      Air                   101.3                         1                   298.15             19.5
        1  cell size  C2H4        width      Air                   101.3                         1                   373.15             16.0
    
    **Example:**
    ::
        c2h4data = getCellWidthData(queryfuel="C2H4")
        oxidiserdict = {}

        for _, (k, v) in enumerate(c2h4data.items()):

            oxidisers = list(v["Oxidizer"])
            for oxidiser in oxidisers:
                if oxidiser in list(oxidiserdict.keys()):
                    oxidiserdict[oxidiser] += 1
                else:
                    oxidiserdict[oxidiser] = 0

        print("{'Oxidiser': Number of Hits} ==> ", oxidiserdict)
    
    Output:
    ::
        {'Oxidiser': Number of Hits} ==>  {'O2': 92, 'Air': 53}
    
    """

    urlext = _getCellWidthURLext(queryfuel=queryfuel)
    # Get the Cell width page and create a soup object of it
    page_cellwidths = requests.get(ROOT_URL + "html/" + urlext)
    soup = BeautifulSoup(page_cellwidths.content, "html.parser")
    # Create list of all available text files
    txtfiles = soup.find_all(string=lambda text: ".txt" in text)

    # Turn the list of discovered textfiles into a list of URL extensions
    urlext_list = []
    for txtfile in txtfiles:
        urlext = list(soup.find("a", string=txtfile)["href"])
        while (urlext.pop(0) != "/"):
            pass
        urlext_list.append("".join(urlext))

    # Find all the relevant tables in the webpage and clean them up
    cellwidthdata_list = []
    for table in soup.find_all("table"):
        # The tables we are interested in have no attributes
        if len(table.attrs) == 0:

            # Keep all the detected table elements in a list
            tablestrings = []
            for string in table.strings:
                tablestrings.append(str(string))

            # Clean up the table elements
            tablestrings = list(
                filter(lambda element: element != "\n", tablestrings))
            for i in range(len(tablestrings)):
                # Remove leading '\n' characters
                while ("\n" in tablestrings[i]):
                    tablestrings[i] = tablestrings[i][1:]
                # Remove leading and trailing spaces
                if len(tablestrings[i]) >= 2:
                    tablestrings[i] = (tablestrings[i][1:]
                                       if tablestrings[i][0] == " " else
                                       tablestrings[i])
                    tablestrings[i] = (tablestrings[i][:-1]
                                       if tablestrings[i][-1] == " " else
                                       tablestrings[i])
                # Remove trailing colons
                tablestrings[i] = (tablestrings[i][:-1] if tablestrings[i][-1]
                                   == ":" else tablestrings[i])
                # Cleaning up values
                if i % 2 == 1:
                    # Check if value had units, and move units to the key
                    stringcomponents = tablestrings[i].split(" ")
                    if is_number(stringcomponents[0]) is True:
                        if stringcomponents[-1].isalpha() is True:
                            tablestrings[i - 1] += f" ({stringcomponents[-1]})"
                            tablestrings[i] = float(stringcomponents[0])
                    # Check if value is a range, and mark for omission if it is
                    val = "".join(
                        stringcomponents[:-1]) if stringcomponents[-1].isalpha(
                        ) == True else "".join(stringcomponents)
                    if "-" in val:
                        if all(is_number(elem)
                               for elem in val.split("-")) == True:
                            tablestrings[i - 1] = "*delete"
                            tablestrings[i] = "*delete"

            # Delete keys and values marked for deletion
            tablestrings = list(
                filter(lambda element: element != "*delete", tablestrings))

            # Package every element of tablestrings in alternating dictionary keys and values
            cellwidthdata_list.append(
                dict(zip(tablestrings[::2], tablestrings[1::2])))

    # Package each dictionary of table data collected with the csv data that goes with it
    for i in range(len(cellwidthdata_list)):
        pandasdf = _pandascleaner(
            pd.read_table(f"{ROOT_URL}{urlext_list[i]}",
                          delimiter=",",
                          dtype=float))
        cellwidthdata_list[i] = _recastaspandas(
            basepandadf=pandasdf, dicttorecast=cellwidthdata_list[i])
    returndict = dict(zip(txtfiles, cellwidthdata_list))

    return returndict

Example #36

0

Show file

File: HaloFeedback.py Project: rama270677/HaloFeedback

    def dEdt_ej(self, r0, v_orb, v_cut=-1, n_kick=N_KICK, correction=np.ones(N_GRID)):
        """Calculate carried away by particles which are completely unbound.

        Parameters:
            - r0 : radial position of the perturbing body [pc]
            - v_orb: orbital velocity [km/s]
            - v_cut: optional, only scatter with particles slower than v_cut [km/s]
                        defaults to v_max(r) (i.e. all particles)
            - n_kick: optional, number of grid points to use when integrating over
                        Delta-eps (defaults to N_KICK = 100).
        """
        if v_cut < 0:
            v_cut = self.v_max(r0)

        T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb

        dE = np.zeros(N_GRID)

        # Calculate sizes of kicks and corresponding weights for integration
        if n_kick == 1:  # Replace everything by the average if n_kick = 1
            delta_eps_list = (
                -2 * v_orb ** 2 * np.log(1 + self.Lambda ** 2) / self.Lambda ** 2,
            )
            frac_list = (1,)

        else:
            b_list = np.geomspace(self.b_min(v_orb), self.b_max(v_orb), n_kick)
            delta_eps_list = self.delta_eps_of_b(v_orb, b_list)

            # Step size for trapezoidal integration
            step = delta_eps_list[1:] - delta_eps_list[:-1]
            step = np.append(step, 0)
            step = np.append(0, step)

            # Make sure that the integral is normalised correctly
            renorm = np.trapz(self.P_delta_eps(v_orb, delta_eps_list), delta_eps_list)
            frac_list = 0.5 * (step[:-1] + step[1:]) / renorm

        # Sum over the kicks
        for delta_eps, b, frac in zip(delta_eps_list, b_list, frac_list):

            # Maximum impact parameter which leads to the ejection of particles
            b_ej_sq = self.b_90(v_orb) ** 2 * ((2 * v_orb ** 2 / self.eps_grid) - 1)

            # Define which energies are allowed to scatter
            mask = (
                (self.eps_grid > self.psi(r0) * (1 - b / r0) - 0.5 * v_cut ** 2)
                & (self.eps_grid < self.psi(r0) * (1 + b / r0))
                & (b ** 2 < b_ej_sq)
            )

            r_eps = G_N * self.M_BH / self.eps_grid[mask]
            r_cut = G_N * self.M_BH / (self.eps_grid[mask] + 0.5 * v_cut ** 2)

            if np.sum(mask) > 0:

                L1 = np.minimum((r0 - r0 ** 2 / r_eps) / b, 0.999999)
                alpha1 = np.arccos(L1)
                L2 = np.maximum((r0 - r0 ** 2 / r_cut) / b, -0.999999)
                alpha2 = np.arccos(L2)

                m = (2 * b / r0) / (1 - (r0 / r_eps) + b / r0)
                mask1 = (m <= 1) & (alpha2 > alpha1)
                mask2 = (m > 1) & (alpha2 > alpha1)

                N1 = np.zeros(len(m))
                if np.sum(mask1) > 0:
                    N1[mask1] = ellipe(m[mask1]) - ellipeinc(
                        (np.pi - alpha2[mask1]) / 2, m[mask1]
                    )
                if np.sum(mask2) > 0:
                    N1[mask2] = ellipeinc_alt((np.pi - alpha1[mask2]) / 2, m[mask2])

                dE[mask] += (
                    -frac
                    * correction[mask]
                    * self.f_eps[mask]
                    * (1 + b ** 2 / self.b_90(v_orb) ** 2) ** 2
                    * np.sqrt(1 - r0 / r_eps + b / r0)
                    * N1
                    * (self.eps_grid[mask] + delta_eps)
                )

        norm = (
            2
            * np.sqrt(2 * (self.psi(r0)))
            * 4
            * np.pi ** 2
            * r0
            * (self.b_90(v_orb) ** 2 / (v_orb) ** 2)
        )
        return norm * np.trapz(dE, self.eps_grid) / T_orb

Example #37

0

Show file

File: HaloFeedback.py Project: rama270677/HaloFeedback

    def dfdt_plus(self, r0, v_orb, v_cut=-1, n_kick=1, correction=1):
        """Particles to add back into distribution function from E - dE -> E."""
        if v_cut < 0:
            v_cut = self.v_max(r0)

        T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb

        df = np.zeros(N_GRID)

        # Calculate sizes of kicks and corresponding weights for integration
        if n_kick == 1:  # Replace everything by the average if n_kick = 1
            delta_eps_list = (
                -2 * v_orb ** 2 * np.log(1 + self.Lambda ** 2) / self.Lambda ** 2,
            )
            frac_list = (1,)
        else:
            b_list = np.geomspace(self.b_min(v_orb), self.b_max(v_orb), n_kick)
            delta_eps_list = self.delta_eps_of_b(v_orb, b_list)

            # Step size for trapezoidal integration
            step = delta_eps_list[1:] - delta_eps_list[:-1]
            step = np.append(step, 0)
            step = np.append(0, step)

            # Make sure that the integral is normalised correctly
            renorm = np.trapz(self.P_delta_eps(v_orb, delta_eps_list), delta_eps_list)
            frac_list = 0.5 * (step[:-1] + step[1:]) / renorm

        # Sum over the kicks
        for delta_eps, b, frac in zip(delta_eps_list, b_list, frac_list):
            # Value of specific energy before the kick
            eps_old = self.eps_grid - delta_eps

            # Define which energies are allowed to scatter
            mask = (eps_old > self.psi(r0) * (1 - b / r0) - 0.5 * v_cut ** 2) & (
                eps_old < self.psi(r0) * (1 + b / r0)
            )

            # Sometimes, this mask has no non-zero entries
            if np.sum(mask) > 0:
                r_eps = G_N * self.M_BH / eps_old[mask]
                r_cut = G_N * self.M_BH / (eps_old[mask] + 0.5 * v_cut ** 2)

                # Distribution of particles before they scatter
                f_old = self.interpolate_DF(eps_old[mask], correction)

                L1 = np.minimum((r0 - r0 ** 2 / r_eps) / b, 0.999999)

                alpha1 = np.arccos(L1)
                L2 = np.maximum((r0 - r0 ** 2 / r_cut) / b, -0.999999)
                alpha2 = np.arccos(L2)

                m = (2 * b / r0) / (1 - (r0 / r_eps) + b / r0)
                mask1 = (m <= 1) & (alpha2 > alpha1)
                mask2 = (m > 1) & (alpha2 > alpha1)

                N1 = np.zeros(len(m))
                if np.sum(mask1) > 0:
                    N1[mask1] = ellipe(m[mask1]) - ellipeinc(
                        (np.pi - alpha2[mask1]) / 2, m[mask1]
                    )
                if np.sum(mask2) > 0:
                    N1[mask2] = ellipeinc_alt(
                        (np.pi - alpha1[mask2]) / 2, m[mask2]
                    )  # - ellipeinc_alt((np.pi - alpha2[mask2])/2, m[mask2])

                df[mask] += (
                    frac
                    * f_old
                    * (1 + b ** 2 / self.b_90(v_orb) ** 2) ** 2
                    * np.sqrt(1 - r0 / r_eps + b / r0)
                    * N1
                )

        T_orb = (2 * np.pi * r0 * pc_to_km) / v_orb
        norm = (
            2
            * np.sqrt(2 * (self.psi(r0)))
            * 4
            * np.pi ** 2
            * r0
            * (self.b_90(v_orb) ** 2 / (v_orb) ** 2)
        )
        return norm * df / T_orb / self.DoS

Example #38

0

Show file

File: bench_sample_without_replacement.py Project: ViralLeadership/Repositorios

                           "Value".center(12),))
    print(25 * "-" + ("|" + "-" * 14) * 1)
    for key, value in arguments.items():
        print("%s \t | %s " % (str(key).ljust(16),
                               str(value).strip().center(12)))
    print("")

    print("Sampling algorithm performance:")
    print("===============================")
    print("Results are averaged over %s repetition(s)." % opts.n_times)
    print("")

    fig = plt.figure('scikit-learn sample w/o replacement benchmark results')
    plt.title("n_population = %s, n_times = %s" %
              (opts.n_population, opts.n_times))
    ax = fig.add_subplot(111)
    for name in sampling_algorithm:
        ax.plot(ratio, time[name], label=name)

    ax.set_xlabel('ratio of n_sample / n_population')
    ax.set_ylabel('Time (s)')
    ax.legend()

    # Sort legend labels
    handles, labels = ax.get_legend_handles_labels()
    hl = sorted(zip(handles, labels), key=operator.itemgetter(1))
    handles2, labels2 = zip(*hl)
    ax.legend(handles2, labels2, loc=0)

    plt.show()

Example #39

0

Show file

    insert_character = """
    INSERT INTO charactercreator_character
    (name, level, exp, hp, strength, intelligence, dexterity,wisdom)
    VALUES """ + str(character[1:]) + ";"
    pg_curs.execute(insert_character)

#showing the table we just made in elephent sql
pg_curs.execute('SELECT * FROM charactercreator_character;')

#example to show everything has been updated to elephentsql!
pg_curs.fetchall()

#closing and commiting to save changes
pg_curs.close()
pg_conn.commit()

#now reopening the connection to check for errors!
pg_curs = pg_conn.cursor()
pg_curs.execute('SELECT * from charactercreator_character;')
pg_characters = pg_curs.fetchall()

#first row in sqlite
characters[0]

#first row in sqelephant
pg_characters[0]

#writing to verify that entries all coppied over accurately!
for character, pg_character in zip(characters, pg_characters):
    assert character == pg_character

Example #40

0

Show file

File: pesp_instance.py Project: svmldon/PESP

    def generate_genetic_model(self):
        start_time = time.time()

        gen_structure = self.gen_structure

        # All variables
        v_len = len(self.gen_structure.vertex_var)
        p_len = len(self.gen_structure.p_var)
        N = v_len + p_len

        pop_num = 100
        num_of_random_el = 6
        max_num_step = 10000

        # ========================
        # I. Initial population for p and v
        # ========================
        parent_population_v = numpy.zeros(shape=(pop_num, v_len))
        parent_population_p = numpy.zeros(shape=(pop_num, p_len))

        for i in range(0, pop_num):
            # parent_population_v[i, :] = self._create_smart_random_solution(v_len, self.T)
            # parent_population_v[i, :] = self._create_random_solution(v_len, self.T)
            parent_population_v[i, :] = self._create_fixed_solution()
            # parent_population_p[i, :] = self._create_random_solution(p_len, 2)

        num_steps = 0
        min_objective = sys.maxint
        while num_steps < max_num_step:

            # ===========================
            # II. Offspring population: uniform crossover of two vectors + mutation
            # ===========================

            offspring_population_v = numpy.zeros(shape=(pop_num, v_len))
            offspring_population_p = numpy.zeros(shape=(pop_num, p_len))

            i = 0
            final_population = None
            # crossover = "uniform"
            # crossover = "cross"
            crossover = "part"
            while i < pop_num:
                parent_population = numpy.hstack((parent_population_v, parent_population_p))
                parent_1 = self._get_parent(parent_population, num_of_random_el)
                parent_2 = self._get_parent(parent_population, num_of_random_el)

                parent_v_1 = parent_1[:v_len]
                parent_v_2 = parent_2[:v_len]

                parent_p_1 = parent_1[v_len:]
                parent_p_2 = parent_2[v_len:]

                if crossover == "cross":
                    offspring_v_1, offspring_v_2 = self._generate_offspring_cross(parent_v_1, parent_v_2, self.T)
                    # offspring_p_1, offspring_p_2 = self._generate_offspring_cross(parent_p_1, parent_p_2, 2)

                    offspring_population_v[i, :] = offspring_v_1
                    offspring_population_v[i + 1, :] = offspring_v_2

                    # offspring_population_p[i, :] = offspring_p_1
                    # offspring_population_p[i + 1, :] = offspring_p_2

                    i += 2
                elif crossover == "uniform":
                    offspring_v_1 = self._generate_offspring_uniform(parent_v_1, parent_v_2, self.T)
                    offspring_p_1 = self._generate_offspring_uniform(parent_p_1, parent_p_2, 2)

                    offspring_population_v[i, :] = offspring_v_1
                    offspring_population_p[i, :] = offspring_p_1
                    i += 1

                elif crossover == "part":
                    offspring_v_1 = self._generate_offspring_uniform_part(parent_v_1, parent_v_2, self.T)
                    # offspring_p_1 = self._generate_offspring_uniform_part(parent_p_1, parent_p_2, 2)

                    offspring_population_v[i, :] = offspring_v_1
                    # offspring_population_p[i, :] = offspring_p_1
                    i += 1

            offspring_population = numpy.hstack((offspring_population_v, offspring_population_p))
            obj_func_value = numpy.apply_along_axis(self._objective_function, axis=1, arr=offspring_population)
            min_objective = min(obj_func_value)
            print min_objective

            if abs(min_objective) < 5:
                final_population = offspring_population
                argmin_obj_value = obj_func_value.argmin()
                break

            parent_population_v = offspring_population_v
            parent_population_p = offspring_population_p
            num_steps += 1

        vp_final = final_population[argmin_obj_value]
        vertex_var = gen_structure.vertex_var
        p_var = gen_structure.p_var

        v_final = vp_final[:v_len]
        p_final = vp_final[v_len:]
        print "Objective function: " + str(min_objective)
        print("--- %s seconds ---" % (time.time() - start_time))
        for vertex, val in zip(vertex_var.values(), v_final.tolist()):
            print vertex + "\t" + str(val)

        print "\n"

        for vertex, val in zip(p_var.values(), p_final.tolist()):
            print vertex + "\t" + str(val)
        pass

Example #41

0

Show file

def visualize_dataset(dataset,
                      train_classes=None,
                      train_data=None,
                      train_filenames=None,
                      train_labels=None,
                      test_classes=None,
                      test_data=None,
                      test_filenames=None,
                      test_labels=None,
                      visualize_hexagonal=None,
                      create_h5=False,
                      verbosity_level=2):

    Hexnet_print(f'Visualizing dataset {dataset}')

    start_time = time()

    if create_h5:
        dataset = f'{dataset}_visualized.h5'

        create_dataset_h5(dataset, train_classes, train_data, train_filenames,
                          train_labels, test_classes, test_data,
                          test_filenames, test_labels)
    elif os.path.isfile(dataset) and dataset.lower().endswith('.csv'):
        dataset_visualized = f'{dataset}_visualized'

        with open(dataset) as dataset_file:
            dataset_reader = csv.reader(dataset_file)
            dataset_data = list(dataset_reader)[1:]

        for label, filename, data in tqdm(dataset_data):
            current_output_dir = os.path.join(dataset_visualized, label)
            os.makedirs(current_output_dir, exist_ok=True)

            with open(os.path.join(current_output_dir, filename),
                      'w') as current_data_file:
                print(data.replace('"', ''), file=current_data_file)
    else:
        dataset_visualized = f'{dataset}_visualized'

        if os.path.isfile(dataset) and dataset.lower().endswith('.h5'):
            for current_class in train_classes:
                os.makedirs(os.path.join(dataset_visualized, 'train',
                                         current_class),
                            exist_ok=True)

            for current_class in test_classes:
                os.makedirs(os.path.join(dataset_visualized, 'test',
                                         current_class),
                            exist_ok=True)
        else:
            shutil.copytree(dataset,
                            dataset_visualized,
                            ignore=copytree_ignore_files)

        for current_set, current_data, current_filenames, current_labels in \
         zip(('train', 'test'), (train_data, test_data), (train_filenames, test_filenames), (train_labels, test_labels)):

            if verbosity_level >= 1:
                Hexnet_print(f'\t> current_set={current_set}')

            if not current_data.size:
                continue

            for file, filename, label in zip(tqdm(current_data),
                                             current_filenames,
                                             current_labels):
                filename = os.path.join(dataset_visualized, current_set, label,
                                        filename)

                if verbosity_level >= 3:
                    Hexnet_print(f'\t\t\t> filename={filename}')

                filename_lower = filename.lower()

                if filename_lower.endswith('.csv'):
                    np.savetxt(filename,
                               np.reshape(file, newshape=(1, file.shape[0])),
                               delimiter=',')
                elif filename_lower.endswith('.npy'):
                    np.save(filename, file)
                else:
                    if not visualize_hexagonal:
                        imsave(filename, file)
                    else:
                        filename = '.'.join(filename.split('.')[:-1])
                        visualize_hexarray(normalize_array(file), filename)

    time_diff = time() - start_time

    Hexnet_print(f'Visualized dataset {dataset} in {time_diff:.3f} seconds')

Example #42

0

Show file

File: neat.py Project: vfcosta/qd-coegan

    def select(self,
               population,
               discard_percent=0,
               k=config.evolution.tournament_size):
        """Select individuals based on fitness sharing"""

        if config.evolution.evaluation.type == "spatial":
            return self.select_spatial(population)

        ### TOURNAMENT TEST
        # population_size = len(population.phenotypes())
        # phenotypes = population.phenotypes()
        # selected = []
        # for i in range(population_size):
        #     p = np.random.choice(phenotypes, 3, replace=False).tolist()
        #     p.sort(key=lambda x: x.fitness())
        #     selected.append([p[0], p[0]])
        # return [selected]
        ###

        population_size = len(population.phenotypes())
        species_selected = []
        species_list = population.species_list
        average_species_fitness_list = []
        for species in species_list[:]:
            species.remove_invalid()  # discard invalid individuals
            if len(species) > 0:
                average_species_fitness_list.append(species.average_fitness())
            else:
                species_list.remove(species)
        total_fitness = np.sum(average_species_fitness_list)

        # initialize raw sizes with equal proportion
        raw_sizes = [population_size / len(species_list)] * len(species_list)
        if total_fitness != 0:
            # calculate proportional sizes when total fitness is not zero
            raw_sizes = [
                average_species_fitness / total_fitness * population_size
                for average_species_fitness in average_species_fitness_list
            ]

        sizes = tools.round_array(raw_sizes,
                                  max_sum=population_size,
                                  invert=True)

        for species_obj, size in zip(species_list, sizes):
            size = int(size)
            # discard the lowest-performing individuals
            species = species_obj.best_percent(1 - discard_percent)

            # tournament selection inside species
            selected = []

            # ensure that the best was selected
            if config.evolution.speciation.keep_best and size > 0:
                selected.append([species[0]])

            orig_species = list(species)
            for i in range(size - len(selected)):
                parents = []
                for l in range(2):
                    winner = None
                    for j in range(k):
                        random_index = np.random.randint(0, len(species))
                        if winner is None or species[random_index].fitness(
                        ) < winner.fitness():
                            winner = species[random_index]
                        del species[
                            random_index]  # remove element to emulate draw without replacement
                        if len(
                                species
                        ) == 0:  # restore original list when there is no more individuals to draw
                            species = list(orig_species)
                    parents.append(winner)
                    if config.evolution.crossover_rate == 0:
                        # do not draw another individual from the population if there is no probability of crossover
                        break
                selected.append(parents)

            species_selected.append(selected)
        return species_selected

Example #43

0

Show file

File: Punto2a.py Project: Casih/CM20151_HW4_CarlosSanchez

col7.append('nyr')
col8.append('yrb')
col9.append('yre')
col10.append('elev')
col11.append('CT')
col12.append('CN')
col13.append('River_Name')
col14.append('OCN')
for i in range(300):
    col1.append(No[i])
    col2.append(m2s_ratio[i])
    col3.append(lonm[i])
    col4.append(latm[i])
    col5.append(area[i])
    col6.append(Vol[i])
    col7.append(nyr[i])
    col8.append(yrb[i])
    col9.append(yre[i])
    col10.append(elev[i])
    col11.append(CT[i])
    col12.append(CN[i])
    col13.append(River_Name[i])
    col14.append(OCN[i])

rows = zip(col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11,
           col12, col13, col14)
with open("top_300_rios.csv", "w") as output:
    for row in rows:
        writer = csv.writer(output, lineterminator='\n')
        writer.writerow(row)

Example #44

0

Show file

File: aluqa_data_processing.py Project: orperel/ALUQANet

    def text_to_instance(self, 
                         question_text: str, 
                         passage_text: str,
                         passage_tokens: List[Token],
                         passage_sentence_tokens: List[List[Token]],
                         numbers_in_passage: List[Any],
                         number_words : List[str],
                         number_indices: List[int],
                         number_len: List[int],
                         sentence_indices: List[int],
                         question_id: str = None, 
                         passage_id: str = None,
                         answer_annotations: List[Dict] = None
                         ) -> Union[Instance, None]:
        # Tokenize question and passage
        question_tokens = self.tokenizer.tokenize(question_text)
        qlen = len(question_tokens)
        plen = len(passage_tokens)

        question_passage_tokens = [Token('[CLS]')] + question_tokens + [Token('[SEP]')] + passage_tokens
        question_passage_sentence_tokens = [[Token('[CLS]')] + question_tokens + [Token('[SEP]')] +
                                   sentence_tokens + [Token('[SEP]')]
                                   for sentence_tokens in passage_sentence_tokens]
        passage_sentence_tokens = [[Token('[CLS]')] + sentence_tokens + [Token('[SEP]')]
                                   for sentence_tokens in passage_sentence_tokens]

        question_passage_sentence_mask = [-1] * (qlen + 2) + sentence_indices
        if len(question_passage_tokens) > self.max_pieces - 1:
            question_passage_tokens = question_passage_tokens[:self.max_pieces - 1]
            question_passage_sentence_mask = question_passage_sentence_mask[:self.max_pieces - 1]
            passage_tokens = passage_tokens[:self.max_pieces - qlen - 3]
            sentence_indices = sentence_indices[:self.max_pieces - qlen - 3]
            plen = len(passage_tokens)
            number_indices, number_len, numbers_in_passage = \
                clipped_passage_num(number_indices, number_len, numbers_in_passage, plen)
        
        question_passage_tokens += [Token('[SEP]')]
        question_passage_sentence_mask += [-1]
        number_indices = [index + qlen + 2 for index in number_indices] + [-1]
        # Not done in-place so they won't change the numbers saved for the passage
        number_len = number_len + [1]
        numbers_in_passage = numbers_in_passage + [0]
        number_tokens = [Token(str(number)) for number in numbers_in_passage]
        extra_number_tokens = [Token(str(num)) for num in self.extra_numbers]
        
        mask_indices = [0, qlen + 1, len(question_passage_tokens) - 1]
        
        fields: Dict[str, Field] = {}
            
        # Add feature fields
        question_passage_field = TextField(question_passage_tokens, self.token_indexers)
        fields["question_passage"] = question_passage_field

        fields["sentences_mask"] = ArrayField(np.array(question_passage_sentence_mask), padding_value=-1)
        fields["sentences_tokens"] = ListField([TextField(sentence_tokens, self.token_indexers)
                                                for sentence_tokens in passage_sentence_tokens])
        fields["question_sentences_tokens"] = ListField([TextField(q_sentence_tokens, self.token_indexers)
                                                         for q_sentence_tokens in question_passage_sentence_tokens])

        number_token_indices = \
            [ArrayField(np.arange(start_ind, start_ind + number_len[i]), padding_value=-1) 
             for i, start_ind in enumerate(number_indices)]
        fields["number_indices"] = ListField(number_token_indices)
        numbers_in_passage_field = TextField(number_tokens, self.token_indexers)
        extra_numbers_field = TextField(extra_number_tokens, self.token_indexers)
        all_numbers_field = TextField(extra_number_tokens + number_tokens, self.token_indexers)
        mask_index_fields: List[Field] = [IndexField(index, question_passage_field) for index in mask_indices]
        fields["mask_indices"] = ListField(mask_index_fields)
        
        # Compile question, passage, answer metadata
        metadata = {"original_passage": passage_text,
                    "original_question": question_text,
                    "original_numbers": numbers_in_passage,
                    "original_number_words": number_words,
                    "extra_numbers": self.extra_numbers,
                    "passage_tokens": passage_tokens,
                    "sentence_indices": sentence_indices,
                    "question_tokens": question_tokens,
                    "question_passage_tokens": question_passage_tokens,
                    "sentnces_mask": question_passage_sentence_mask,
                    "passage_id": passage_id,
                    "question_id": question_id}
        
        
        if answer_annotations:
            for annotation in answer_annotations:
                tokenized_spans = [[token.text for token in self.tokenizer.tokenize(answer)] for answer in annotation['spans']]
                annotation['spans'] = [tokenlist_to_passage(token_list) for token_list in tokenized_spans]
            
            # Get answer type, answer text, tokenize
            answer_type, answer_texts = DropReader.extract_answer_info_from_annotation(answer_annotations[0])
            tokenized_answer_texts = []
            num_spans = min(len(answer_texts), self.max_spans)
            for answer_text in answer_texts:
                answer_tokens = self.tokenizer.tokenize(answer_text)
                tokenized_answer_texts.append(' '.join(token.text for token in answer_tokens))
            
        
            metadata["answer_annotations"] = answer_annotations
            metadata["answer_texts"] = answer_texts
            metadata["answer_tokens"] = tokenized_answer_texts
            
            # Find answer text in question and passage
            valid_question_spans = DropReader.find_valid_spans(question_tokens, tokenized_answer_texts)
            for span_ind, span in enumerate(valid_question_spans):
                valid_question_spans[span_ind] = (span[0] + 1, span[1] + 1)
            valid_passage_spans = DropReader.find_valid_spans(passage_tokens, tokenized_answer_texts)
            for span_ind, span in enumerate(valid_passage_spans):
                valid_passage_spans[span_ind] = (span[0] + qlen + 2, span[1] + qlen + 2)
        
            # Get target numbers
            target_numbers = []
            for answer_text in answer_texts:
                number = self.word_to_num(answer_text)
                if number is not None:
                    target_numbers.append(number)
            
            # Get possible ways to arrive at target numbers with add/sub        
            
            valid_expressions: List[List[int]] = []
            exp_strings = None
            if answer_type in ["number", "date"]:
                if self.exp_search == 'full':
                    expressions = get_full_exp(list(enumerate(self.extra_numbers + numbers_in_passage)),
                                               target_numbers,
                                               self.operations,
                                               self.op_dict,
                                               self.max_depth)
                    zipped = list(zip(*expressions))
                    if zipped:
                        valid_expressions = list(zipped[0])
                        exp_strings = list(zipped[1])
                elif self.exp_search == 'add_sub':
                    valid_expressions = \
                        DropReader.find_valid_add_sub_expressions(self.extra_numbers + numbers_in_passage, 
                                                                  target_numbers, 
                                                                  self.max_numbers_expression)
                elif self.exp_search == 'template':
                    valid_expressions, exp_strings = \
                        get_template_exp(self.extra_numbers + numbers_in_passage, 
                                         target_numbers,
                                         self.templates,
                                         self.template_strings)
                    exp_strings = sum(exp_strings, [])
                
            
            # Get possible ways to arrive at target numbers with counting
            valid_counts: List[int] = []
            if answer_type in ["number"]:
                numbers_for_count = list(range(self.max_count + 1))
                valid_counts = DropReader.find_valid_counts(numbers_for_count, target_numbers)
            
            # Update metadata with answer info
            answer_info = {"answer_passage_spans": valid_passage_spans,
                           "answer_question_spans": valid_question_spans,
                           "num_spans": num_spans,
                           "expressions": valid_expressions,
                           "counts": valid_counts}
            if self.exp_search in ['template', 'full']:
                answer_info['expr_text'] = exp_strings
            metadata["answer_info"] = answer_info
        
            # Add answer fields
            passage_span_fields: List[Field] = [SpanField(span[0], span[1], question_passage_field) for span in valid_passage_spans]
            if not passage_span_fields:
                passage_span_fields.append(SpanField(-1, -1, question_passage_field))
            fields["answer_as_passage_spans"] = ListField(passage_span_fields)

            question_span_fields: List[Field] = [SpanField(span[0], span[1], question_passage_field) for span in valid_question_spans]
            if not question_span_fields:
                question_span_fields.append(SpanField(-1, -1, question_passage_field))
            fields["answer_as_question_spans"] = ListField(question_span_fields)
            
            if self.exp_search == 'add_sub':
                add_sub_signs_field: List[Field] = []
                extra_signs_field: List[Field] = []
                for signs_for_one_add_sub_expressions in valid_expressions:
                    extra_signs = signs_for_one_add_sub_expressions[:len(self.extra_numbers)]
                    normal_signs = signs_for_one_add_sub_expressions[len(self.extra_numbers):]
                    add_sub_signs_field.append(SequenceLabelField(normal_signs, numbers_in_passage_field))
                    extra_signs_field.append(SequenceLabelField(extra_signs, extra_numbers_field))
                if not add_sub_signs_field:
                    add_sub_signs_field.append(SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field))
                if not extra_signs_field:
                    extra_signs_field.append(SequenceLabelField([0] * len(self.extra_numbers), extra_numbers_field))
                fields["answer_as_expressions"] = ListField(add_sub_signs_field)
                if self.extra_numbers:
                    fields["answer_as_expressions_extra"] = ListField(extra_signs_field)
            elif self.exp_search in ['template', 'full']:
                expression_indices = []
                for expression in valid_expressions:
                    if not expression:
                        expression.append(3 * [-1])
                    expression_indices.append(ArrayField(np.array(expression), padding_value=-1))
                if not expression_indices:
                    expression_indices = \
                        [ArrayField(np.array([3 * [-1]]), padding_value=-1) for _ in range(len(self.templates))]
                fields["answer_as_expressions"] = ListField(expression_indices)

            count_fields: List[Field] = [LabelField(count_label, skip_indexing=True) for count_label in valid_counts]
            if not count_fields:
                count_fields.append(LabelField(-1, skip_indexing=True))
            fields["answer_as_counts"] = ListField(count_fields)
            
            fields["num_spans"] = LabelField(num_spans, skip_indexing=True)
        
        fields["metadata"] = MetadataField(metadata)
        
        return Instance(fields)

Example #45

0

Show file

File: gpx.py Project: vpodzime/modrana

    def togpx(self, gpx_version=DEF_GPX_VERSION, human_namespace=False):
        """Generate a GPX metadata element subtree

        >>> meta = _GpxMeta(time=(2008, 6, 3, 16, 12, 43, 1, 155, 0))
        >>> ET.tostring(meta.togpx())
        '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>2008-06-03T16:12:43+0000</ns0:time></ns0:metadata>'
        >>> meta.bounds = {"minlat": 52, "maxlat": 54, "minlon": -2,
        ...                "maxlon": 1}
        >>> ET.tostring(meta.togpx())
        '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>2008-06-03T16:12:43+0000</ns0:time><ns0:bounds maxlat="54" maxlon="1" minlat="52" minlon="-2" /></ns0:metadata>'
        >>> meta.bounds = [point.Point(52.015, -0.221),
        ...                point.Point(52.167, 0.390)]
        >>> ET.tostring(meta.togpx()) # doctest: +ELLIPSIS
        '<ns0:metadata xmlns:ns0="http://www.topografix.com/GPX/1/1"><ns0:time>...</ns0:time><ns0:bounds maxlat="52.167" maxlon="0.39" minlat="52.015" minlon="-0.221" /></ns0:metadata>'

        :type gpx_version: ``str``
        :param gpx_version: GPX version to generate
        :type human_namespace: ``bool``
        :param human_namespace: Whether to generate output using human readable
            namespace prefixes
        :rtype: :class:`ET.Element`
        :return: GPX metadata element

        """
        elementise = partial(create_elem, gpx_version=gpx_version,
                             human_namespace=human_namespace)
        metadata = elementise("metadata", None)
        if self.name:
            metadata.append(elementise("name", None, self.name))
        if self.desc:
            metadata.append(elementise("desc", None, self.desc))
        if self.author:
            element = elementise("author", None)
            if self.author['name']:
                element.append(elementise("name", None, self.author['name']))
            if self.author['email']:
                element.append(elementise("email",
                                          dict(zip(self.author['email'].split("@"),
                                                   ("id", "domain")))))
            if self.author['link']:
                element.append(elementise("link", None, self.author['link']))
            metadata.append(element)
        if self.copyright:
            author = {"author": self.copyright['name']} if self.copyright['name'] else None
            element = elementise("copyright", author)
            if self.copyright['year']:
                element.append(elementise("year", None, self.copyright['year']))
            if self.copyright['license']:
                license = elementise("license", None)
                element.append(license)
            metadata.append(element)
        if self.link:
            for link in self.link:
                if isinstance(link, basestring):
                    element = elementise("link", {"href": link})
                else:
                    element = elementise("link", {"href": link["href"]})
                    if link['text']:
                        element.append(elementise("text", None, link["text"]))
                    if link['type']:
                        element.append(elementise("type", None, link["type"]))
                metadata.append(element)
        element = elementise("time", None)
        if isinstance(self.time, (time.struct_time, tuple)):
            element.text = time.strftime("%Y-%m-%dT%H:%M:%SZ", self.time) # GPX documentation states, the Z on the end should be capital
        elif isinstance(self.time, utils.Timestamp):
            element.text = self.time.isoformat()
        else:
            element.text = time.strftime("%Y-%m-%dT%H:%M:%SZ") # GPX documentation states, the Z on the end should be capital
        metadata.append(element)
        if self.keywords:
            metadata.append(elementise("keywords", None, self.keywords))
        if self.bounds:
            if not isinstance(self.bounds, dict):
                latitudes = list(map(attrgetter("latitude"), self.bounds))
                longitudes = list(map(attrgetter("longitude"), self.bounds))
                bounds = {
                    "minlat": str(min(latitudes)),
                    "maxlat": str(max(latitudes)),
                    "minlon": str(min(longitudes)),
                    "maxlon": str(max(longitudes)),
                }
            else:
                bounds = dict([(k, str(v)) for k, v in self.bounds.items()])
            metadata.append(elementise("bounds", bounds))
        if self.extensions:
            element = elementise("extensions")
            for i in self.extensions:
                element.append(i)
            metadata.append(extensions)
        return metadata

Example #46

0

Show file

def create_dataset_overview(classes, train_labels, test_labels, dataset,
                            output_dir):
    # Prepare dataset overview table: entries

    total_string = 'Total'

    unique, counts = np.unique(train_labels, return_counts=True)
    train_labels_unique_counts = dict(zip(unique, counts))
    unique, counts = np.unique(test_labels, return_counts=True)
    test_labels_unique_counts = dict(zip(unique, counts))

    labels_unique_counts = {key: train_value + test_value for (key, train_value), (_, test_value) in \
     zip(train_labels_unique_counts.items(), test_labels_unique_counts.items())}

    train_labels_unique_counts_total = sum(train_labels_unique_counts.values())
    test_labels_unique_counts_total = sum(test_labels_unique_counts.values())
    labels_unique_counts_total = sum(labels_unique_counts.values())

    entries_max_len = max(
        np.vectorize(len)(classes).max(), len(total_string),
        len(str(labels_unique_counts_total)))

    # Create dataset overview table: rows and columns

    total_string = total_string.rjust(entries_max_len)

    header_entries = '|'.join(
        [f' {c.rjust(entries_max_len)} ' for c in classes])
    train_entries = '|'.join([
        f' {str(v).rjust(entries_max_len)} '
        for v in train_labels_unique_counts.values()
    ])
    test_entries = '|'.join([
        f' {str(v).rjust(entries_max_len)} '
        for v in test_labels_unique_counts.values()
    ])
    total_entries = '|'.join([
        f' {str(v).rjust(entries_max_len)} '
        for v in labels_unique_counts.values()
    ])

    train_entries_total = str(train_labels_unique_counts_total).rjust(
        entries_max_len, ' ')
    test_entries_total = str(test_labels_unique_counts_total).rjust(
        entries_max_len, ' ')
    total_entries_total = str(labels_unique_counts_total).rjust(
        entries_max_len, ' ')

    header = '| Set \ Class |' + header_entries + '| ' + total_string + ' |'
    train = '| Train       |' + train_entries + '| ' + train_entries_total + ' |'
    test = '| Test        |' + test_entries + '| ' + test_entries_total + ' |'
    total = '| Total       |' + total_entries + '| ' + total_entries_total + ' |'
    hline = len(header) * '-'

    dataset_overview = \
     f'{hline}\n'  \
     f'{header}\n' \
     f'{hline}\n'  \
     f'{train}\n'  \
     f'{test}\n'   \
     f'{hline}\n'  \
     f'{total}\n'  \
     f'{hline}'

    # Output dataset overview table

    Hexnet_print(f'Dataset overview\n{dataset_overview}')

    if output_dir:
        filename = os.path.join(
            output_dir, f'{os.path.basename(dataset)}_dataset_overview.dat')

        os.makedirs(output_dir, exist_ok=True)

        with open(filename, 'w') as file:
            print(dataset_overview, file=file)

Example #47

0

Show file

File: captureImg.py Project: scottn12/Hackathon2019

 results = face_client.face.identify(face_ids, PERSON_GROUP_ID)
 names = []
 for result in results:
     candidates = sorted(result.candidates,
                         key=lambda c: c.confidence,
                         reverse=True)
     if len(candidates) > 0:
         top_candidate = candidates[0]
         person = face_client.person_group_person.get(
             PERSON_GROUP_ID, top_candidate.person_id)
         if top_candidate.confidence > .8:
             names.append(person.name)
         else:
             names.append(person.name)
 post = {'person': []}
 for name, face in zip(names, faces):
     emo = best_emotion(face.face_attributes.emotion)
     post['person'].append([name, emo])
     print(f'Name: {name}, Emo: {emo}')
 if len(names) == 0:
     post['person'].append(['Stranger', emo])
 r = requests.post(url, json=post)
 '''
 img = Image.open('saved_img.jpg')
 # For each face returned use the face rectangle and draw a red box.
 draw = ImageDraw.Draw(img)
 for face in faces:
     draw.rectangle(getRectangle(face), outline='red')
     topLeft, botRight = getRectangle(face)
     tLeft, tTop = getTextLoc(face)
     draw.text((tLeft, tTop - 40),

Example #48

0

Show file

File: discriminator_2.py Project: gankzhang/generate_with_feature

def discriminator_2(x_onehot,encoder_state, batch_size, seq_len, vocab_size,_embed_ph,D_with_state = param.D_with_state,filter_dim = 100):
    # get the embedding dimension for each presentation
    #assert isinstance(emb_dim_single, int) and emb_dim_single > 0

    filter_sizes = [2, 3, 4, 5]
    num_filters = [300, 300, 300, 300]
    dropout_keep_prob = 0.75
    num_rep = 64
    dis_emb_dim = param.INPUT_DIM
    emb_dim_single = int(dis_emb_dim / num_rep)
    d_embeddings = tf.get_variable(name='d_emb', shape=[vocab_size,dis_emb_dim],
                                   initializer=xavier_initializer())#changed from trainable = False
#    _embed_init = d_embeddings.assign(_embed_ph)
    #the embeddings of the discriminator is different with the one for generator
    input_x_re = tf.reshape(x_onehot, [-1, vocab_size])
    emb_x_re = tf.matmul(input_x_re, d_embeddings)
    emb_x = tf.reshape(emb_x_re, [batch_size, seq_len, dis_emb_dim])  # batch_size x seq_len x dis_emb_dim

    emb_x_expanded = tf.expand_dims(emb_x, -1)  # batch_size x seq_len x dis_emb_dim x 1
    print('shape of emb_x_expanded: {}'.format(emb_x_expanded.get_shape().as_list()))

    # Create a convolution + maxpool layer for each filter size
    pooled_outputs = []
    for filter_size, num_filter in zip(filter_sizes, num_filters):
        conv = conv2d(emb_x_expanded, num_filter, k_h=filter_size, k_w=emb_dim_single,
                      d_h=1, d_w=emb_dim_single, padding='VALID'#d is stride, k is kernel size
                      ,scope="conv-%s" % filter_size)  # batch_size x (seq_len-k_h+1) x num_rep x num_filter
        out = tf.nn.relu(conv, name="relu")
        # pooled = tf.nn.max_pool(out, ksize=[1, seq_len - filter_size + 1, 1, 1],
        #                         strides=[1, 1, 1, 1], padding='VALID',
        #                         name="pool")# batch_size x 1 x num_rep x num_filter
        pooled = tf.reduce_max(out,axis=1)
        pooled = tf.reshape(pooled,[BATCH_SIZE,1,num_rep,num_filter])
        pooled_outputs.append(pooled)

    # Combine all the pooled features
    if D_with_state:
        num_filters_total = sum(num_filters)
    else:
        num_filters_total = sum(num_filters)
    h_pool = tf.concat(pooled_outputs, 3)  # batch_size x 1 x num_rep x num_filters_total

    print('shape of h_pool: {}'.format(h_pool.get_shape().as_list()))
    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

    if D_with_state:
        tile_state = tf.reshape(tf.tile(encoder_state, [1, num_rep, 1]), [BATCH_SIZE*num_rep, int(encoder_state.shape[-1])])
        state = linear(tile_state, output_size=num_filters_total, use_bias=True, scope='state_fc')
        h_pool_flat = tf.multiply(state,h_pool_flat)


    # Add highway
    h_highway = highway(h_pool_flat, h_pool_flat.get_shape()[1], 1, 0)  # (batch_size*num_rep) x num_filters_total

    # Add dropout
    # if D_with_state:
    #     h_highway = tf.concat([tf.reshape(tf.tile(encoder_state, [1, num_rep, 1]), [-1, encoder_state.shape[-1]]), h_highway], 1)
    print('shape of h_highway: {}'.format(h_highway.get_shape().as_list()))
    h_drop = tf.nn.dropout(h_highway, dropout_keep_prob, name='dropout')

    # fc
    fc_out = linear(h_drop, output_size=100, use_bias=True, scope='fc')
    logits = linear(fc_out, output_size=1, use_bias=True, scope='logits')
    logits = tf.squeeze(logits, -1)  # batch_size*num_rep

    return logits,h_highway

Example #49

0

Show file

File: Obesity Python Analysis.py Project: abhpathak/Obesity-analysis

print DataFrame(model.labels_, columns=['Label']).groupby('Label').size()
print final

df_append['cluster_num']=model.labels_
df_append=DataFrame(df_append['cluster_num'],columns=['cluster_num'])


df_clus = pd.merge(df_base, df_append, left_index=True, right_index=True, how='inner');

mean_obesity=df_clus.groupby(['cluster_num'])['PCT_OBESE10'].mean().sort_values()

#dict.fromkeys(mean_obesity.index)
#clus_order={mean_obesity.index.values}


clus_dict=dict(zip(mean_obesity.index,[x for x in range(0,len(mean_obesity.index))]))
df_clus['cluster']=df_clus['cluster_num'].apply(lambda row: clus_dict[row])



#def clus_name(clus_num):
#    clus_map = {0: "a", 1: "b", 2: "c",3: "d"}
#    return clus_map[clus_num]
#
#df_clus['clus_profile']=df_clus['cluster'].apply(lambda row: clus_name(row))



with open('us_counties.topo.json') as json_data:
    d = json.load(json_data)

Example #50

0

Show file

def g_nonsaturating_loss(fake_pred, weights):
    loss = 0
    for fake, weight in zip(fake_pred, weights):
        loss += weight * F.softplus(-fake).mean()

    return loss / len(fake_pred)

Example #51

0

Show file

 def get_divergence(self, source):
     """Gets the full divergence given the aperture radius."""
     ss = [a - b for a, b in zip(self.center - source.center)]
     return self.r * 2 * (np.dot(ss, ss)**-0.5)

Example #52

0

Show file

File: main.py Project: joe2hpimn/kafkatos3

def main(argv):
    global config
    global logger
    """Program entry point.

    :param argv: command-line arguments
    :type argv: :class:`list`
    """
    author_strings = []
    for name, email in zip(metadata.authors, metadata.emails):
        author_strings.append('Author: {0} <{1}>'.format(name, email))

    epilog = '''{project} {version}

{authors}
URL: <{url}>
'''.format(project=metadata.project,
           version=metadata.version,
           authors='\n'.join(author_strings),
           url=metadata.url)

    arg_parser = argparse.ArgumentParser(
        prog=argv[0],
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=metadata.description,
        epilog=epilog)
    arg_parser.add_argument('-V',
                            '--version',
                            action='version',
                            version='{0} {1}'.format(metadata.project,
                                                     metadata.version))
    arg_parser.add_argument('configfile', help='kafkatos3 config file to use')

    args = arg_parser.parse_args(args=argv[1:])

    config = parse_config(args.configfile)

    logger = logging.getLogger('kafkatos3')
    formatter = logging.Formatter(
        '%(asctime)s - [%(levelname)s/%(processName)s] - %(message)s')
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    logger.setLevel(logging.INFO)
    logger.addHandler(ch)

    logger.info(
        "===============================================================")
    logger.info(epilog)
    logger.info(
        "===============================================================")

    for x in range(0, int(config.get("consumer", "consumer_processes"))):
        p = Process(target=consumer_process, args=(str(x), ))
        p.start()
        processes.append(p)

    p = Process(target=compression_process)
    p.start()
    processes.append(p)

    p = Process(target=s3_process)
    p.start()
    processes.append(p)

    setproctitle("[mainprocess] " + getproctitle())

    for p in processes:
        p.join()

    return 0

Example #53

0

Show file

File: pretty.py Project: nouiz/pykit

def ffunc(f):
    restype = ftype(f.type.restype)
    types, names = map(ftype, f.type.argtypes), map(prefix, f.args)
    args = ajoin(map(sjoin, zip(types, names)))
    header = sjoin(["function", restype, f.name + parens(args)])
    return njoin([header + " {", njoin(map(fblock, f.blocks)), "}"])

Example #54

0

Show file

File: utils.py Project: Kommunarus/office_camera

def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 1024), thr=0.10, gen=1000):
    # Creates kmeans anchors for use in *.cfg files: from utils.utils import *; _ = kmean_anchors()
    # n: number of anchors
    # img_size: (min, max) image size used for multi-scale training (can be same values)
    # thr: IoU threshold hyperparameter used for training (0.0 - 1.0)
    # gen: generations to evolve anchors using genetic algorithm
    from utils.datasets import LoadImagesAndLabels

    def print_results(k):
        k = k[np.argsort(k.prod(1))]  # sort small to large
        iou = wh_iou(wh, torch.Tensor(k))
        max_iou = iou.max(1)[0]
        bpr, aat = (max_iou > thr).float().mean(), (iou > thr).float().mean() * n  # best possible recall, anch > thr
        print('%.2f iou_thr: %.3f best possible recall, %.2f anchors > thr' % (thr, bpr, aat))
        print('n=%g, img_size=%s, IoU_all=%.3f/%.3f-mean/best, IoU>thr=%.3f-mean: ' %
              (n, img_size, iou.mean(), max_iou.mean(), iou[iou > thr].mean()), end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k

    def fitness(k):  # mutation fitness
        iou = wh_iou(wh, torch.Tensor(k))  # iou
        max_iou = iou.max(1)[0]
        return (max_iou * (max_iou > thr).float()).mean()  # product

    # Get label wh
    wh = []
    dataset = LoadImagesAndLabels(path, augment=True, rect=True, cache_labels=True)
    nr = 1 if img_size[0] == img_size[1] else 10  # number augmentation repetitions
    for s, l in zip(dataset.shapes, dataset.labels):
        wh.append(l[:, 3:5] * (s / s.max()))  # image normalized to letterbox normalized wh
    wh = np.concatenate(wh, 0).repeat(nr, axis=0)  # augment 10x
    wh *= np.random.uniform(img_size[0], img_size[1], size=(wh.shape[0], 1))  # normalized to pixels (multi-scale)
    wh = wh[(wh > 2.0).all(1)]  # remove below threshold boxes (< 2 pixels wh)

    # Darknet yolov3.cfg anchors
    use_darknet = False
    if use_darknet and n == 9:
        k = np.array([[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]])
    else:
        # Kmeans calculation
        from scipy.cluster.vq import kmeans
        print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
        s = wh.std(0)  # sigmas for whitening
        k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
        k *= s
    wh = torch.Tensor(wh)
    k = print_results(k)

    # # Plot
    # k, d = [None] * 20, [None] * 20
    # for i in tqdm(range(1, 21)):
    #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))
    # ax = ax.ravel()
    # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
    # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
    # ax[0].hist(wh[wh[:, 0]<100, 0],400)
    # ax[1].hist(wh[wh[:, 1]<100, 1],400)
    # fig.tight_layout()
    # fig.savefig('wh.png', dpi=200)

    # Evolve
    npr = np.random
    f, sh, mp, s = fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    for _ in tqdm(range(gen), desc='Evolving anchors'):
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)  # 98.6, 61.6
        kg = (k.copy() * v).clip(min=2.0)
        fg = fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            print_results(k)
    k = print_results(k)

    return k

Example #55

0

Show file

File: 00_pandas_all_in_one.py Project: sohomghosh/Python_Machine-Learning_Codes

#Read a list which was stored as a string while saving dataframe
import ast
df.col_new = df.col_with_list.map(ast.literal_eval)

#Find out top 5 key words per document
#Refernce: https://stackoverflow.com/questions/38955182/find-names-of-top-n-highest-value-columns-in-each-pandas-dataframe-row
nlargest = 5
order = np.argsort(-tfidf_df.values, axis=1)[:,:nlargest]
result = pd.DataFrame(tfidf_df.columns[order], 
                      columns=['top{}'.format(i) for i in range(1, nlargest+1)],
                      index=tfidf_df.index)
result.head()

#dataframe as dictionary
dict_for_repl = dict(zip(df[col_as_keys].values,df[col_as_values].values))

#Group by then join
#Every man should have min five distinct cars
c=pd.DataFrame({'is_car_cnt_more5':cleaned_data.groupby(['man_id'])['car_id'].nunique()>=5}).reset_index()
c=c[c['is_car_cnt_more5']==True]
del c['is_car_cnt_more5']
cleaned_data=pd.merge(cleaned_data,c,on=['man_id'])
cleaned_data=cleaned_data.dropna()

df1.join(df2,left_on='col_for_joining_of_df1',right_on='col_for_joining_of_df2').drop('col_for_joining_of_df2',axis=1)

#groupby then count and rename the count as new_cc
gf.groupby(['id'], as_index=False).size().reset_index().rename(columns={0:'new_cc'}).head()

Example #56

0

Show file

File: cnn_theano.py Project: ZUOYANGDING/dpl

def main():
	train, test = get_data()
	train_X = rearrange(train['X'])
	train_Y = train['y'].flatten()-1
	train_X, train_Y = shuffle(train_X, train_Y)
	test_X = rearrange(test['X'])
	test_Y = test['y'].flatten()-1

	max_iter = 6
	print_period = 10
	lr = np.float32(0.0001)
	mu = np.float32(0.99)
	decay = np.float32(0.9)
	eps = np.float32(1e-10)
	reg = np.float32(0.01)
	N = train_X.shape[0]
	batch_sz = 500
	num_batch = N // batch_sz
	M = 500
	K = 10
	poolsz = (2, 2)

	W1_shape = (20, 3, 5, 5) #(num_feature_maps, num_color_channels, filter_width, filter_height)
	W1_init = init_filter(W1_shape, poolsz)
	b1_init = np.zeros(W1_shape[0], dtype=np.float32)

	W2_shape = (50, 20, 5, 5) #(num_feature_maps, old_num_feature_maps, filter_width, filter_height)
	W2_init = init_filter(W2_shape, poolsz)
	b2_init = np.zeros(W2_shape[0], dtype=np.float32)

	#ANN
	W3_init = np.random.randn(W2_shape[0]*5*5, M) / np.sqrt(W2_shape[0]*5*5 + M)
	b3_init = np.zeros(M, dtype=np.float32)
	W4_init = np.random.randn(M, K) / np.sqrt(M+K)
	b4_init = np.zeros(K, dtype=np.float32)

	#init theano variables
	X = T.tensor4('X', dtype='float32')
	Y = T.ivector('T')
	W1 = theano.shared(W1_init, 'W1')
	b1 = theano.shared(b1_init, 'b1')
	W2 = theano.shared(W2_init, 'W2')
	b2 = theano.shared(b2_init, 'b2')
	W3 = theano.shared(W3_init.astype(np.float32), 'W3')
	b3 = theano.shared(b3_init, 'b3')
	W4 = theano.shared(W4_init.astype(np.float32), 'W4')
	b4 = theano.shared(b4_init, 'b4')

	#forward
	Z1 = convpool(X, W1, b1)
	Z2 = convpool(Z1, W2, b2)
	Z3 = relu(Z2.flatten(ndim=2).dot(W3) + b3)
	pY = T.nnet.softmax(Z3.dot(W4) + b4)
	
	#test & prediction functions
	params = [W1, b1, W2, b2, W3, b3, W4, b4]
	rcost = reg * np.sum((p*p).sum() for p in params)
	cost = -(T.log(pY[T.arange(Y.shape[0]), Y])).mean() + rcost
	prediction = T.argmax(pY, axis=1)
	momentum = [theano.shared(
		np.zeros_like(p.get_value(), dtype=np.float32)) for p in params]
	catchs = [theano.shared(
		np.ones_like(p.get_value(), dtype=np.float32)) for p in params]
	
	#RMSProp
	updates = []
	grads = T.grad(cost, params)
	for p, g, m, c in zip(params, grads, momentum, catchs):
		updates_c = decay*c + (np.float32(1.0)-decay)*g*g
		updates_m = mu*m - lr*g / T.sqrt(updates_c + eps)
		updates_p = p + updates_m

		updates.append([c, updates_c])
		updates.append([m, updates_m])
		updates.append([p, updates_p])

	#init functions
	train_op = theano.function(inputs=[X, Y], updates=updates)
	prediction_op = theano.function(inputs=[X, Y], outputs=[cost, prediction])

	costs= []
	for i in range(max_iter):
		shuffle_X, shuffle_Y = shuffle(train_X, train_Y)
		for j in range(num_batch):
			x = shuffle_X[j*batch_sz : (j*batch_sz+batch_sz), :]
			y = shuffle_Y[j*batch_sz : (j*batch_sz+batch_sz)]

			train_op(x, y)
			if j % print_period == 0:
				cost_val, p_val = prediction_op(test_X, test_Y)
				e = error_rate(p_val, test_Y)
				costs.append(cost_val)
				print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, cost_val, e))
	plt.plot(costs)
	plt.show()

Example #57

0

Show file

File: stats.py Project: 17ai/pymc3

def compare(traces, models, ic='WAIC', method='stacking', b_samples=1000,
            alpha=1, seed=None, round_to=2):
    R"""Compare models based on the widely available information criterion (WAIC)
    or leave-one-out (LOO) cross-validation.
    Read more theory here - in a paper by some of the leading authorities on
    model selection - dx.doi.org/10.1111/1467-9868.00353

    Parameters
    ----------
    traces : list of PyMC3 traces
    models : list of PyMC3 models
        in the same order as traces.
    ic : string
        Information Criterion (WAIC or LOO) used to compare models.
        Default WAIC.
    method : str
        Method used to estimate the weights for each model. Available options
        are:
            - 'stacking' : (default) stacking of predictive distributions.
            - 'BB-pseudo-BMA' : pseudo-Bayesian Model averaging using Akaike-type
        weighting. The weights are stabilized using the Bayesian bootstrap
            - 'pseudo-BMA': pseudo-Bayesian Model averaging using Akaike-type
        weighting, without Bootstrap stabilization (not recommended)

        For more information read https://arxiv.org/abs/1704.02030
    b_samples: int
        Number of samples taken by the Bayesian bootstrap estimation. Only
        useful when method = 'BB-pseudo-BMA'.
    alpha : float
        The shape parameter in the Dirichlet distribution used for the
        Bayesian bootstrap. Only useful when method = 'BB-pseudo-BMA'. When
        alpha=1 (default), the distribution is uniform on the simplex. A
        smaller alpha will keeps the final weights more away from 0 and 1.
    seed : int or np.random.RandomState instance
           If int or RandomState, use it for seeding Bayesian bootstrap. Only
           useful when method = 'BB-pseudo-BMA'. Default None the global
           np.random state is used.
    round_to : int
        Number of decimals used to round results (default 2).

    Returns
    -------
    A DataFrame, ordered from lowest to highest IC. The index reflects
    the order in which the models are passed to this function. The columns are:
    IC : Information Criteria (WAIC or LOO).
        Smaller IC indicates higher out-of-sample predictive fit ("better" model).
        Default WAIC.
    pIC : Estimated effective number of parameters.
    dIC : Relative difference between each IC (WAIC or LOO)
    and the lowest IC (WAIC or LOO).
        It's always 0 for the top-ranked model.
    weight: Relative weight for each model.
        This can be loosely interpreted as the probability of each model
        (among the compared model) given the data. By default the uncertainty
        in the weights estimation is considered using Bayesian bootstrap.
    SE : Standard error of the IC estimate.
        If method = BB-pseudo-BMA these values are estimated using Bayesian
        bootstrap.
    dSE : Standard error of the difference in IC between each model and
    the top-ranked model.
        It's always 0 for the top-ranked model.
    warning : A value of 1 indicates that the computation of the IC may not be
        reliable see http://arxiv.org/abs/1507.04544 for details.
    """
    if ic == 'WAIC':
        ic_func = waic
        df_comp = pd.DataFrame(index=np.arange(len(models)),
                               columns=['WAIC', 'pWAIC', 'dWAIC', 'weight',
                                        'SE', 'dSE', 'warning'])

    elif ic == 'LOO':
        ic_func = loo
        df_comp = pd.DataFrame(index=np.arange(len(models)),
                               columns=['LOO', 'pLOO', 'dLOO', 'weight',
                                        'SE', 'dSE', 'warning'])

    else:
        raise NotImplementedError(
            'The information criterion {} is not supported.'.format(ic))

    if len(set([len(m.observed_RVs) for m in models])) != 1:
        raise ValueError(
            'The number of observed RVs should be the same across all models')

    if method not in ['stacking', 'BB-pseudo-BMA', 'pseudo-BMA']:
        raise ValueError('The method {}, to compute weights,'
                         'is not supported.'.format(method))

    warns = np.zeros(len(models))

    c = 0
    def add_warns(*args):
        warns[c] = 1

    with warnings.catch_warnings():
        warnings.showwarning = add_warns
        warnings.filterwarnings('always')

        ics = []
        for c, (t, m) in enumerate(zip(traces, models)):
            ics.append((c, ic_func(t, m, pointwise=True)))

    ics.sort(key=lambda x: x[1][0])

    if method == 'stacking':
        N, K, ic_i = _ic_matrix(ics)
        exp_ic_i = np.exp(-0.5 * ic_i)
        Km = K - 1

        def w_fuller(w):
            return np.concatenate((w, [max(1. - np.sum(w), 0.)]))

        def log_score(w):
            w_full = w_fuller(w)
            score = 0.
            for i in range(N):
                score += np.log(np.dot(exp_ic_i[i], w_full))
            return -score

        def gradient(w):
            w_full = w_fuller(w)
            grad = np.zeros(Km)
            for k in range(Km):
                for i in range(N):
                    grad[k] += (exp_ic_i[i, k] - exp_ic_i[i, Km]) / \
                        np.dot(exp_ic_i[i], w_full)
            return -grad

        theta = np.full(Km, 1. / K)
        bounds = [(0., 1.) for i in range(Km)]
        constraints = [{'type': 'ineq', 'fun': lambda x: -np.sum(x) + 1.},
                       {'type': 'ineq', 'fun': lambda x: np.sum(x)}]

        w = minimize(fun=log_score,
                     x0=theta,
                     jac=gradient,
                     bounds=bounds,
                     constraints=constraints)

        weights = w_fuller(w['x'])
        ses = [res[1] for _, res in ics]

    elif method == 'BB-pseudo-BMA':
        N, K, ic_i = _ic_matrix(ics)
        ic_i = ic_i * N

        b_weighting = dirichlet.rvs(alpha=[alpha] * N, size=b_samples,
                                    random_state=seed)
        weights = np.zeros((b_samples, K))
        z_bs = np.zeros_like(weights)
        for i in range(b_samples):
            z_b = np.dot(b_weighting[i], ic_i)
            u_weights = np.exp(-0.5 * (z_b - np.min(z_b)))
            z_bs[i] = z_b
            weights[i] = u_weights / np.sum(u_weights)

        weights = weights.mean(0)
        ses = z_bs.std(0)

    elif method == 'pseudo-BMA':
        min_ic = ics[0][1][0]
        Z = np.sum([np.exp(-0.5 * (x[1][0] - min_ic)) for x in ics])
        weights = []
        ses = []
        for _, res in ics:
            weights.append(np.exp(-0.5 * (res[0] - min_ic)) / Z)
            ses.append(res[1])

    if np.any(weights):
        for i, (idx, res) in enumerate(ics):
            diff = res[3] - ics[0][1][3]
            d_ic = np.sum(diff)
            d_se = np.sqrt(len(diff) * np.var(diff))
            se = ses[i]
            weight = weights[i]
            df_comp.at[idx] = (round(res[0], round_to),
                               round(res[2], round_to),
                               round(d_ic, round_to),
                               round(weight, round_to),
                               round(se, round_to),
                               round(d_se, round_to),
                               warns[idx])

        return df_comp.sort_values(by=ic)

Example #58

0

Show file

def fixAnnotations(annotations):
    texts = []
    for xt, yt, s in zip(xcen, ycen, annotations):
        texts.append(plt.text(xt, yt, s))
    return texts

Example #59

0

Show file

File: stats.py Project: 17ai/pymc3

def loo(trace, model=None, pointwise=False, progressbar=False):
    """Calculates leave-one-out (LOO) cross-validation for out of sample predictive
    model fit, following Vehtari et al. (2015). Cross-validation is computed using
    Pareto-smoothed importance sampling (PSIS).

    Parameters
    ----------
    trace : result of MCMC run
    model : PyMC Model
        Optional model. Default None, taken from context.
    pointwise: bool
        if True the pointwise predictive accuracy will be returned.
        Default False
    progressbar: bool
        Whether or not to display a progress bar in the command line. The
        bar shows the percentage of completion, the evaluation speed, and
        the estimated time to completion

    Returns
    -------
    namedtuple with the following elements:
    loo: approximated Leave-one-out cross-validation
    loo_se: standard error of loo
    p_loo: effective number of parameters
    loo_i: and array of the pointwise predictive accuracy, only if pointwise True
    """
    model = modelcontext(model)

    log_py = _log_post_trace(trace, model, progressbar=progressbar)
    if log_py.size == 0:
        raise ValueError('The model does not contain observed values.')

    # Importance ratios
    r = np.exp(-log_py)
    r_sorted = np.sort(r, axis=0)

    # Extract largest 20% of importance ratios and fit generalized Pareto to each
    # (returns tuple with shape, location, scale)
    q80 = int(len(log_py) * 0.8)
    pareto_fit = np.apply_along_axis(
        lambda x: pareto.fit(x, floc=0), 0, r_sorted[q80:])

    if np.any(pareto_fit[0] > 0.7):
        warnings.warn("""Estimated shape parameter of Pareto distribution is
        greater than 0.7 for one or more samples.
        You should consider using a more robust model, this is
        because importance sampling is less likely to work well if the marginal
        posterior and LOO posterior are very different. This is more likely to
        happen with a non-robust model and highly influential observations.""")

    elif np.any(pareto_fit[0] > 0.5):
        warnings.warn("""Estimated shape parameter of Pareto distribution is
        greater than 0.5 for one or more samples. This may indicate
        that the variance of the Pareto smoothed importance sampling estimate
        is very large.""")

    # Calculate expected values of the order statistics of the fitted Pareto
    S = len(r_sorted)
    M = S - q80
    z = (np.arange(M) + 0.5) / M
    expvals = map(lambda x: pareto.ppf(z, x[0], scale=x[2]), pareto_fit.T)

    # Replace importance ratios with order statistics of fitted Pareto
    r_sorted[q80:] = np.vstack(expvals).T
    # Unsort ratios (within columns) before using them as weights
    r_new = np.array([r[np.argsort(i)]
                      for r, i in zip(r_sorted.T, np.argsort(r.T, axis=1))]).T

    # Truncate weights to guarantee finite variance
    w = np.minimum(r_new, r_new.mean(axis=0) * S**0.75)

    loo_lppd_i = - 2. * logsumexp(log_py, axis=0, b=w / np.sum(w, axis=0))

    loo_lppd_se = np.sqrt(len(loo_lppd_i) * np.var(loo_lppd_i))

    loo_lppd = np.sum(loo_lppd_i)

    lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0]))

    p_loo = lppd + (0.5 * loo_lppd)

    if pointwise:
        LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, LOO_i')
        return LOO_r(loo_lppd, loo_lppd_se, p_loo, loo_lppd_i)
    else:
        LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO')
        return LOO_r(loo_lppd, loo_lppd_se, p_loo)

Example #60

0

Show file

File: spider_5i5j.py Project: XuJiaCheng1993/China-House-Tongji

class WoAiWoJiaSpider(BaseSpider):
    TypesMap = {
        '二手房': 'ershoufang',
        '新房': 'loupan',
        '成交': 'solds',
        '租房': 'zufang'
    }
    CityMap = {
        '苏州': 'sz',
    }
    Name = '5i5j'
    PerInPages = {k: v for k, v in zip(TypesMap.values(), [30, 10, 30, 30])}
    TitleMap = {
        '二手房':
        ['房源ID', '行政区', '商圈', '小区', '总价', '单价', '户型', '面积', '楼层', '产权', '年代'],
        '新房': ['行政区', '楼盘', '均价', '开发商', '交房时间', '销售状态', '产权年限', '绿化率', '容积率'],
        '成交': ['小区', '户型', '总价', '均价', '成交日期', '楼层', '朝向', '商圈'],
        '租房': ['租金', '户型', '面积', '支付方式', '年代', '出租方式', '行政区', '商圈', '小区']
    }

    def __init__(self,
                 name='',
                 file_path='./',
                 city='苏州',
                 types='二手房',
                 pages=None,
                 re_connect=5):
        super(WoAiWoJiaSpider, self).__init__(name=self.Name + name,
                                              filepath=file_path,
                                              re_connect=re_connect)
        self.pages = pages
        if city in self.CityMap.keys():
            self.city = self.CityMap[city]
        else:
            self.logger.info('初始化时, 未知城市%s' % city)

        if types in self.TypesMap.keys():
            self.title = self.TitleMap[types]
            self.type_ = self.TypesMap[types]
        else:
            self.logger.info('初始化时,未知形式%s' % types)

    def __get_info_in_per_url_ershoufang(self, soup):
        detail_0 = soup.select('.rent-top p')[0].text.split('房源ID：')[-1]

        detail_1 = soup.select('.cur-path a')
        detail_1 = [d.text.split('二手房')[0] for d in detail_1][-3:]

        detail_2 = soup.select('.housesty .jlinfo')
        detail_2 = [d.text for d in detail_2]

        detail_3 = soup.select('.infocon span')
        detail_3 = [d.text for i, d in enumerate(detail_3) if i in [1, 3, 4]]

        information = [
            detail_0,
        ] + detail_1 + detail_2 + detail_3

        return information

    def __get_info_in_per_url_chengjiao(self, soup):
        detail_1 = soup.select('.house-tit')
        detail_1 = [d.text.split() for d in detail_1]
        information = detail_1[0]

        detail_2 = soup.select('.house-info .cjinfo')
        detail_2 = [d.text for d in detail_2]
        information += detail_2[:-1] + [
            d for d in detail_2[-1].split() if len(d) > 8
        ]

        detail_3 = soup.select('.detailinfo li')
        detail_3 = [d.text.split('：') for d in detail_3]
        information += [d[-1] for d in detail_3][:2]

        detail_4 = soup.select('.infomain li')
        detail_4 = [
            d.text.split('所在商圈')[-1] for d in detail_4
            if d.text.find('所在商圈') >= 0
        ]
        information += detail_4

        return information

    def __get_info_in_per_url_loupan(self, soup):

        detail_0 = soup.select('.menu li')
        detail_0 = [d.text.split('楼盘')[0] for d in detail_0][-2:]

        detail_1 = soup.select('.details_price  .clearfix')[0].text.split()[0]
        price = ''
        for i in filter(str.isdigit, detail_1):
            price += i

        detail_2 = soup.select('.style_list  .txtList .txt')
        detail_2 = [d.text.split(',')[0] for d in detail_2]
        detail_3 = soup.select('.style_list  .txtList label')[2:]
        detail_3 = [d.text for d in detail_3]

        wy = detail_2[0].split()[0].split(',')[0]

        d2 = [
            '',
        ] * 5
        for i, key in enumerate(['交房时间', '销售状态', '产权年限', '绿化率', '容积率']):
            if key in detail_3:
                idx = detail_3.index(key)
                d2[i] = detail_2[idx]

        information = detail_0 + [price, wy] + d2
        if sum(len(f) for f in information) <= 0:
            information = []

        return information

    def __get_info_in_per_url_zufang(self, soup):
        detail_1 = soup.select('.housesty .jlinfo')
        detail_1 = [d.text for d in detail_1]

        detail_2 = soup.select('.zushous li')
        d2 = ['', '']
        for d in detail_2:
            if d.text.find('年代') >= 0:
                d2[0] = d.text.split('：')[-1]
            if d.text.find('出租方式') >= 0:
                d2[1] = d.text.split('：')[-1]

        detail_3 = soup.select('.cur-path a')
        detail_3 = [d.text.split('租房')[0] for d in detail_3][-3:]

        information = detail_1 + d2 + detail_3

        return information

    def get_urls_in_per_url(self, url, type_='solds'):
        select_key_map = {
            k: v
            for k, v in zip(self.TypesMap.values(), [
                '.pList li .listTit a', '.houseList_list .txt1 a',
                '.pList li a', '.pList li .listTit a'
            ])
        }

        if type_ in select_key_map.keys():
            select_key = select_key_map[type_]
        else:
            self.logger.info('采集网页时，未知形式%s' % type_)
            return []

        try:
            data = self.grasp(url)
            soup = BeautifulSoup(data, 'lxml')
            url_info = soup.select(select_key)
            page_url = [pg_url['href'] for pg_url in url_info]
            if type_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]:
                origin_url = 'https://%s.5i5j.com' % self.city
            else:
                origin_url = 'https://fang.5i5j.com'
            urls = [origin_url + f for f in list(set(page_url))]

        except Exception as error_info:
            urls = []
            self.logger.error(error_info)
        return urls

    def get_num_of_pages(self, url, types_):
        if types_ not in self.TypesMap.values():
            self.logger.info("Unknown %s! Set pages to 1" % types_)
            return 1

        html = self.grasp(url)
        if html is None:
            return 1

        soup = BeautifulSoup(html, 'lxml')
        try:
            if types_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]:
                records = soup.select('.total-box span')[0].text
            else:
                records = soup.select('.houseList_total i')[0].text
        except Exception as error_info:
            self.logger.info("Something is wrong! Set pages to 1")
            self.logger.error(error_info)
            return 1
        pages = int((int(records) - 1) / self.PerInPages[types_]) + 1
        return [pages if pages < self.LimitPages else self.LimitPages][0]

    def _get_url_list_for_run(self, area=None, conditions=None):
        if self.type_ in [self.TypesMap[f] for f in ['成交', '二手房', '租房']]:
            original_url = 'https://%s.5i5j.com/%s/' % (self.city, self.type_)
        else:
            original_url = 'https://fang.5i5j.com/%s/%s/' % (self.city,
                                                             self.type_)

        # url = 'https://sz.5i5j.com/solds/'
        # url = 'https://sz.5i5j.com/ershoufang/'
        # url = 'https://sz.5i5j.com/zufang/'
        # url = 'https://fang.5i5j.com/sz/loupan/'

        temp = original_url
        if area is not None:
            temp += '%s/' % area
        if conditions is not None:
            temp += '%s/' % conditions

        if self.pages is None:
            pages = self.get_num_of_pages(temp, self.type_)
        else:
            pages = self.pages

        ## example_url = 'https://sz.5i5j.com/ershoufang/xiangchengqu/a4p5n3/'
        page_url_list = []
        for i in range(1, pages + 1):
            temp = original_url
            if area is None and conditions is None:
                temp += 'n%s/' % i
            elif area is None and conditions is not None:
                temp += '%sn%s/' % (conditions, i)
            elif area is not None and conditions is None:
                temp += '%s/n%s/' % (area, i)
            else:
                temp += '%s/%sn%s/' % (area, conditions, i)
            page_url_list += [
                temp,
            ]

        return page_url_list

    def _get_save_file_name(self):
        url_file = self.date_path + '/5i5j_page_urls_%s_%s.txt' % (self.city,
                                                                   self.type_)
        info_file = self.date_path + "/5i5j_information_%s_%s.csv" % (
            self.city, self.type_)
        return url_file, info_file

    @property
    def function_map(self):
        Maps = {
            '二手房': self.__get_info_in_per_url_ershoufang,
            '新房': self.__get_info_in_per_url_loupan,
            '成交': self.__get_info_in_per_url_chengjiao,
            '租房': self.__get_info_in_per_url_zufang
        }

        return {self.TypesMap[i]: Maps[i] for i in ['二手房', '新房', '成交', '租房']}