def threaded_function():
	global memories
	global done		
	start = timeit.timeit()
	while not done:
		#cpus.append(psutil.cpu_percent())
		memories.append(psutil.virtual_memory()._asdict())
		end = timeit.timeit()
		if(end - start>5):
			time.sleep(5);
			start=time.timeit()
	print("done")
 def ap():
     _time_st[s] = timeit()
     return ()
def Esol_series_bessel(M, e, N=2):
    E = M
    for n in xrange(1, N + 1):
        E += (2. / n) * jn(n, e) * sin(n * M)
    return E


# # Comparación de Soluciones

# In[256]:

e = 0.6
Ms = linspace(0, 2 * pi, 100)

t1 = timeit()

# Numeric solution
Ens = array([Esol_numeric(M, e) for M in Ms])
t2 = timeit()
print "Numeric:", (t2 - t1) * 1E6
t1 = t2

# Analytic solution
Eas = array([Esol_analytic(M, e) for M in Ms])
t2 = timeit()
print "Analytic:", (t2 - t1) * 1E6
t1 = t2

# Fourier series solution
Efs = array([Esol_series_fourier(M, e) for M in Ms])
def Esol_series_bessel(M,e,N=2):
    E=M
    for n in xrange(1,N+1):
        E+=(2./n)*jn(n,e)*sin(n*M)
    return E


# # Comparación de Soluciones

# In[256]:

e=0.6
Ms=linspace(0,2*pi,100)

t1=timeit()

# Numeric solution
Ens=array([Esol_numeric(M,e) for M in Ms])
t2=timeit();print "Numeric:",(t2-t1)*1E6;t1=t2

# Analytic solution
Eas=array([Esol_analytic(M,e) for M in Ms])
t2=timeit();print "Analytic:",(t2-t1)*1E6;t1=t2

# Fourier series solution
Efs=array([Esol_series_fourier(M,e) for M in Ms])
t2=timeit();print "Fourier:",(t2-t1)*1E6;t1=t2

# Bessel series solution
Ebs=array([Esol_series_bessel(M,e,N=7) for M in Ms])
Beispiel #5
0
 def wrapper(*args, **kwargs):
     start = timeit()
     rv = func(*args, **kwargs)
     end = timeit()
     print(start - end)
     return rv
Beispiel #6
0
def convert(data_dir, output_filepath, append_to_existing=False):
    """
    Converts TransXchange formatted schedule data into GTFS feed.

    data_dir : str
        Data directory containing one or multiple TransXchange .xml files.
    output_filepath : str
        Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip'
    append_to_existing : bool (default is False)
        Flag for appending to existing gtfs-database. This might be useful if you have
        TransXchange .xml files distributed into multiple directories (e.g. separate files for
        train data, tube data and bus data) and you want to merge all those datasets into a single
        GTFS feed.
    """
    # Total start
    tot_start_t = timeit()

    # Filepath for temporary gtfs db
    target_dir = os.path.dirname(output_filepath)
    gtfs_db = os.path.join(target_dir, "gtfs.db")

    # If append to database is false remove previous gtfs-database if it exists
    if append_to_existing == False:
        if os.path.exists(gtfs_db):
            os.remove(gtfs_db)

    # NAPTAN stops
    naptan_stops_fp = get_path("naptan_stops")

    # Retrieve all TransXChange files
    files = glob.glob(os.path.join(data_dir, "*.xml"))

    # Iterate over files
    print("Populating database ..")

    # Limit the processed files by file size (in MB)
    # Files with lower filesize than below will be processed
    file_size_limit = 1000

    # Create workers
    workers = create_workers(input_files=files,
                             file_size_limit=file_size_limit,
                             stops_fp=naptan_stops_fp,
                             gtfs_db=gtfs_db)

    # Create Pool
    pool = multiprocessing.Pool()

    # Generate GTFS info to the database in parallel
    pool.map(process_files, workers)

    # Print information about the total time
    tot_end_t = timeit()
    tot_duration = (tot_end_t - tot_start_t) / 60
    print("===========================================================")
    print("It took %s minutes in total." % round(tot_duration, 1))

    # Generate output dictionary
    gtfs_data = generate_gtfs_export(gtfs_db)

    # Export to disk
    save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
Beispiel #7
0
def process_files(parallel):
    # Get files from input instance
    files = parallel.input_files
    file_size_limit = parallel.file_size_limit
    naptan_stops_fp = parallel.stops_fp
    gtfs_db = parallel.gtfs_db

    for idx, fp in enumerate(files):
        # Filesize
        size = round((os.path.getsize(fp) / 1000000), 1)
        if file_size_limit < size:
            continue

        print(
            "================================================================="
        )
        print("[%s / %s] Processing TransXChange file: %s" %
              (idx, len(files), os.path.basename(fp)))
        print("Size: %s MB" % size)
        # Log start time
        start_t = timeit()

        data = untangle.parse(fp)

        # Parse stops
        stop_data = get_stops(data, naptan_stops_fp=naptan_stops_fp)

        # Parse agency
        agency = get_agency(data)

        # Parse GTFS info containing data about trips, calendar, stop_times and calendar_dates
        gtfs_info = get_gtfs_info(data)

        # Parse stop_times
        stop_times = get_stop_times(gtfs_info)

        # Parse trips
        trips = get_trips(gtfs_info)

        # Parse calendar
        calendar = get_calendar(gtfs_info)

        # Parse calendar_dates
        calendar_dates = get_calendar_dates(gtfs_info)

        # Parse routes
        routes = get_routes(gtfs_info=gtfs_info, data=data)

        # Initialize database connection
        conn = sqlite3.connect(gtfs_db)

        # Only export data into db if there exists valid stop_times data
        if len(stop_times) > 0:
            stop_times.to_sql(name='stop_times',
                              con=conn,
                              index=False,
                              if_exists='append')
            stop_data.to_sql(name='stops',
                             con=conn,
                             index=False,
                             if_exists='append')
            routes.to_sql(name='routes',
                          con=conn,
                          index=False,
                          if_exists='append')
            agency.to_sql(name='agency',
                          con=conn,
                          index=False,
                          if_exists='append')
            trips.to_sql(name='trips',
                         con=conn,
                         index=False,
                         if_exists='append')
            calendar.to_sql(name='calendar',
                            con=conn,
                            index=False,
                            if_exists='append')

            if calendar_dates is not None:
                calendar_dates.to_sql(name='calendar_dates',
                                      con=conn,
                                      index=False,
                                      if_exists='append')
        else:
            print(
                "UserWarning: File %s did not contain valid stop_sequence data, skipping."
                % (os.path.basename(fp)))

        # Close connection
        conn.close()

        # Log end time and parse duration
        end_t = timeit()
        duration = (end_t - start_t) / 60

        print("It took %s minutes." % round(duration, 1))
Beispiel #8
0
def process_files(parallel):
    # Get files from input instance
    files = parallel.input_files
    file_size_limit = parallel.file_size_limit
    gtfs_db = parallel.gtfs_db

    for idx, path in enumerate(files):

        # If type is string, it is a direct filepath to XML
        if isinstance(path, str):
            data, file_size, xml_name = read_unpacked_xml(path)

        # If the type is dictionary contents are in a zip
        elif isinstance(path, dict):

            # If the type of value is a string the file can be read directly
            # from the given Zipfile path, with following structure:
            # {"transxchange_name.xml" : "/home/data/myzipfile.zip"}
            if isinstance(list(path.values())[0], str):
                data, file_size, xml_name = read_xml_inside_zip(path)

            # If the type of value is a dictionary the xml-file
            # is in a ZipFile which is inside another ZipFile.
            # In such cases, the path stucture is:
            # {"outermost_zipfile_path.zip": {"inner_zipfile.zip": "transxchange.xml"}}
            elif isinstance(list(path.values())[0], dict):
                data, file_size, xml_name = read_xml_inside_nested_zip(path)
            else:
                raise ValueError(
                    "Something is wrong with the input xml-file paths.")
        else:
            raise ValueError(
                "Something is wrong with the input xml-file paths.")

        # Filesize
        size = round((file_size / 1000000), 1)
        if file_size_limit < size:
            continue

        print(
            "================================================================="
        )
        print("[%s / %s] Processing TransXChange file: %s" %
              (idx, len(files), xml_name))
        print("Size: %s MB" % size)
        # Log start time
        start_t = timeit()

        # Parse stops
        stop_data = get_stops(data)

        if stop_data is None:
            print("Did not found any valid stops. Skipping..")
            continue

        # Parse agency
        agency = get_agency(data)

        # Parse GTFS info containing data about trips, calendar, stop_times and calendar_dates
        gtfs_info = get_gtfs_info(data)

        # Parse stop_times
        stop_times = get_stop_times(gtfs_info)

        # Parse trips
        trips = get_trips(gtfs_info)

        # Parse calendar
        calendar = get_calendar(gtfs_info)

        # Parse calendar_dates
        calendar_dates = get_calendar_dates(gtfs_info)

        # Parse routes
        routes = get_routes(gtfs_info=gtfs_info, data=data)

        # Initialize database connection
        conn = sqlite3.connect(gtfs_db)

        # Only export data into db if there exists valid stop_times data
        if len(stop_times) > 0:
            stop_times.to_sql(name='stop_times',
                              con=conn,
                              index=False,
                              if_exists='append')
            stop_data.to_sql(name='stops',
                             con=conn,
                             index=False,
                             if_exists='append')
            routes.to_sql(name='routes',
                          con=conn,
                          index=False,
                          if_exists='append')
            agency.to_sql(name='agency',
                          con=conn,
                          index=False,
                          if_exists='append')
            trips.to_sql(name='trips',
                         con=conn,
                         index=False,
                         if_exists='append')
            calendar.to_sql(name='calendar',
                            con=conn,
                            index=False,
                            if_exists='append')

            if calendar_dates is not None:
                calendar_dates.to_sql(name='calendar_dates',
                                      con=conn,
                                      index=False,
                                      if_exists='append')
        else:
            print(
                "UserWarning: File %s did not contain valid stop_sequence data, skipping."
                % (xml_name))

        # Close connection
        conn.close()

        # Log end time and parse duration
        end_t = timeit()
        duration = (end_t - start_t) / 60

        print("It took %s minutes." % round(duration, 1))
Beispiel #9
0
def convert(input_filepath,
            output_filepath,
            append_to_existing=False,
            worker_cnt=None,
            file_size_limit=2000):
    """
    Converts TransXchange formatted schedule data into GTFS feed.

    input_filepath : str
        File path to data directory or a ZipFile containing one or multiple TransXchange .xml files.
        Also nested ZipFiles are supported (i.e. a ZipFile with ZipFile(s) containing .xml files.)
    output_filepath : str
        Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip'
    append_to_existing : bool (default is False)
        Flag for appending to existing gtfs-database. This might be useful if you have
        TransXchange .xml files distributed into multiple directories (e.g. separate files for
        train data, tube data and bus data) and you want to merge all those datasets into a single
        GTFS feed.
    worker_cnt : int
        Number of workers to distribute the conversion process. By default the number of CPUs is used.
    file_size_limit : int
        File size limit (in megabytes) can be used to skip larger-than-memory XML-files (should not happen).
    """
    # Total start
    tot_start_t = timeit()

    # Filepath for temporary gtfs db
    target_dir = os.path.dirname(output_filepath)
    gtfs_db = os.path.join(target_dir, "gtfs.db")

    # If append to database is false remove previous gtfs-database if it exists
    if append_to_existing == False:
        if os.path.exists(gtfs_db):
            os.remove(gtfs_db)

    # Retrieve all TransXChange files
    files = get_xml_paths(input_filepath)

    # Iterate over files
    print("Populating database ..")

    # Create workers
    workers = create_workers(input_files=files,
                             worker_cnt=worker_cnt,
                             file_size_limit=file_size_limit,
                             gtfs_db=gtfs_db)

    # Create Pool
    pool = multiprocessing.Pool()

    # Generate GTFS info to the database in parallel
    pool.map(process_files, workers)

    # Print information about the total time
    tot_end_t = timeit()
    tot_duration = (tot_end_t - tot_start_t) / 60
    print("===========================================================")
    print("It took %s minutes in total." % round(tot_duration, 1))

    # Generate output dictionary
    gtfs_data = generate_gtfs_export(gtfs_db)

    # Export to disk
    save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
def calibrate(args, jones, alphas):
    # simple calibration to test if simulation went as expected.
    # Note do not run on large data set

    # load data
    ms = table(args.ms)
    time = ms.getcol('TIME')
    _, tbin_idx, tbin_counts = chunkify_rows(time, args.utimes_per_chunk)
    n_time = tbin_idx.size
    ant1 = ms.getcol('ANTENNA1')
    ant2 = ms.getcol('ANTENNA2')
    n_ant = np.maximum(ant1.max(), ant2.max()) + 1
    uvw = ms.getcol('UVW').astype(np.float64)
    data = ms.getcol(args.out_col)  # this is where we put the data
    # we know it is pure Stokes I so we can solve using diagonals only
    data = data[:, :, (0, 3)].astype(np.complex128)
    n_row, n_freq, n_corr = data.shape
    flag = ms.getcol('FLAG')
    flag = flag[:, :, (0, 3)]

    # get phase dir
    radec0 = table(args.ms + '::FIELD').getcol('PHASE_DIR').squeeze().astype(
        np.float64)

    # get freqs
    freq = table(args.ms + '::SPECTRAL_WINDOW').getcol('CHAN_FREQ')[0].astype(
        np.float64)
    assert freq.size == n_freq

    # now get the model
    # get source coordinates from lsm
    lsm = Tigger.load(args.sky_model)
    radec = []
    stokes = []
    spi = []
    ref_freqs = []

    for source in lsm.sources:
        radec.append([source.pos.ra, source.pos.dec])
        stokes.append([source.flux.I])
        tmp_spec = source.spectrum
        spi.append([tmp_spec.spi if tmp_spec is not None else 0.0])
        ref_freqs.append([tmp_spec.freq0 if tmp_spec is not None else 1.0])

    n_dir = len(stokes)
    radec = np.asarray(radec)
    lm = radec_to_lm(radec, radec0)

    # get model visibilities
    model = np.zeros((n_row, n_freq, n_dir, 2), dtype=np.complex)
    stokes = np.asarray(stokes)
    ref_freqs = np.asarray(ref_freqs)
    spi = np.asarray(spi)
    for d in range(n_dir):
        Stokes_I = stokes[d] * (freq / ref_freqs[d])**spi[d]
        model[:, :, d, 0:1] = im_to_vis(Stokes_I[None, :, None], uvw,
                                        lm[d:d + 1], freq)
        model[:, :, d, 1] = model[:, :, d, 0]

    # set weights to unity
    weight = np.ones_like(data, dtype=np.float64)

    # initialise gains
    jones0 = np.ones((n_time, n_ant, n_freq, n_dir, n_corr),
                     dtype=np.complex128)

    # calibrate
    ti = timeit()
    jones_hat, jhj, jhr, k = gauss_newton(tbin_idx,
                                          tbin_counts,
                                          ant1,
                                          ant2,
                                          jones0,
                                          data,
                                          flag,
                                          model,
                                          weight,
                                          tol=1e-5,
                                          maxiter=100)
    print("%i iterations took %fs" % (k, timeit() - ti))

    # verify result
    for p in range(2):
        for q in range(p):
            diff_true = np.angle(jones[:, p] * jones[:, q].conj())
            diff_hat = np.angle(jones_hat[:, p] * jones_hat[:, q].conj())
            try:
                assert_array_almost_equal(diff_true, diff_hat, decimal=2)
            except Exception as e:
                print(e)