def calculate_distances(self):
        db = Zipcode()
        if self.from_source.get('va'):
            patzip = self.from_source['va'].zipcode[:5]
        elif self.from_source.get('cms'):
            patzip = self.from_source['cms'].zipcode[:5]
        else:
            return
        pat_latlong = db.zip2geo(patzip)
        logging.debug(f"Zipcode {patzip}, pat_latlong: {pat_latlong}")

        logging.debug(f"Checking distances for {len(self.trials)} trials")
        for trial in self.trials:
            if trial.sites is None:
                logging.debug(f"Site list empty for trial {trial.id}")
            else:
                logging.debug(f"Trial {trial.id} has {len(trial.sites)} sites")
                for site in trial.sites:
                    coordinates = site.get("org_coordinates", 0)
                    logging.debug(f"Coordinates: {coordinates}")
                    if coordinates == 0:
                        zipcode = site.get('org_postal_code', '')
                        zipcode = '' if zipcode is None else zipcode
                        site_latlong = db.zip2geo(zipcode[:5])
                        logging.debug(
                            f"site lat-long (from zip): {site_latlong}")
                    else:
                        site_latlong = (coordinates["lat"], coordinates["lon"])
                        logging.debug(
                            f"site lat-long (from coords): {site_latlong}")
                    if (site_latlong is None) or (pat_latlong is None):
                        logging.debug(
                            f"no distance for site {site['org_name']} at trial={trial.id}"
                        )
                    else:
                        site["distance"] = distance(pat_latlong, site_latlong)
                        logging.debug(
                            f"Distance={site['distance']} for Trial={trial.id}"
                        )

            if trial.locations is None:
                logging.debug(f"Location list empty for trial {trial.id}")
            else:
                logging.debug(
                    f"Trial {trial.id} has {len(trial.locations)} locations")
                for site in trial.locations:
                    site_latlong = db.zip2geo(
                        site.get("LocationZip", "00000")[:5])
                    logging.debug(f"site lat-long (from zip): {site_latlong}")
                    if (site_latlong is None) or (pat_latlong is None):
                        logging.debug(
                            f"no distance for site {site.get('LocationFacility', 'unknown')} at trial={trial.id}"
                        )
                    else:
                        site["distance"] = distance(pat_latlong, site_latlong)
                        logging.debug(
                            f"Distance={site['distance']} for Trial={trial.id}"
                        )
예제 #2
0
def find_similarity_of_points_in_radius(closest_vantage_pt, ts1, radius):
    """
    Given a vantage point and a radius, find the points that fall within the
    circle around the vantage point. Then calculates the distance from all of these
    points to the timeseries of interest.
    
    closest_vantage_pt: number of the vantage point being considered
    ts1: timeseries of interest
    radius: radius of circle to consider
    
    Returns: list of tuples (distance, timeseries id) in sorted order
    """
    #open database for that vantage point
    db = BinarySearchDatabase.connect("VantagePointDatabases/" +
                                      str(closest_vantage_pt) + ".dbdb")

    #find all light curves within 2d of the vantage point
    light_curves_in_radius = db.get_nodes_less_than(radius)
    light_curves_in_radius.append(
        str(closest_vantage_pt))  # add in the vantage pt
    db.close()

    #find similiarity between these light curves and given light curve
    distance = []
    for l in light_curves_in_radius:
        with open("GeneratedTimeseries/Timeseries" + str(l), "rb") as f:
            ts2 = pickle.load(f)
        dist = distances.distance(distances.stand(ts1, ts1.mean(), ts1.std()),
                                  distances.stand(ts2, ts2.mean(), ts2.std()),
                                  mult=1)
        distance.append([dist, "Timeseries" + str(l)])
    return distance
예제 #3
0
def sanity_check(filename, n):
    """
    Function that manually finds the n most similiar timeseries to the given
    timeseries. Serves as a check of the vantage point method
    
    Returns: list of n most similiar filenames 
    """
    ans = []
    d = []
    with open(filename, "rb") as f:
        ts1 = pickle.load(f)

    for i in range(1000):
        with open("GeneratedTimeseries/Timeseries" + str(i), "rb") as f:
            ts2 = pickle.load(f)
        dist = distances.distance(distances.stand(ts1, ts1.mean(), ts1.std()),
                                  distances.stand(ts2, ts2.mean(), ts2.std()),
                                  mult=1)
        d.append([dist, "Timeseries" + str(i)])

    d.sort(key=lambda x: x[0])
    for i in range(1, n + 1):
        ans.append(d[i][1])

    return ans
예제 #4
0
    def __init__(self, lambdaa=1, rail=False):
        self.rail = rail
        self.locdict = readData()

        #Split the locations into dealers and vdcs
        self.vdcDict = {k: v for (k, v) in self.locdict.items() if v.isVDC()}
        self.dealerDict = {
            k: v
            for (k, v) in self.locdict.items() if not v.isVDC()
        }

        self.G = nx.Graph()
        #add all vdcs to graph
        self.G.add_nodes_from(self.vdcDict.keys())

        # Graph distances between vdcs
        for loc1 in self.vdcDict.keys():
            for loc2 in self.vdcDict.keys():
                self.G.add_edge(loc1,
                                loc2,
                                weight=distance(self.vdcDict[loc1],
                                                self.vdcDict[loc2])**lambdaa)

        # Generate paths and lengths between VDCs
        self.vdcPaths = dict(nx.all_pairs_dijkstra_path(self.G))
        self.vdcPathLengths = dict(nx.all_pairs_dijkstra_path_length(self.G))

        # Add dealers to the graph

        # Add dealers
        self.G.add_nodes_from(self.dealerDict.keys())
        # For each dealer find nearest VDC
        for dealer in self.dealerDict.values():
            nearestvdc = min(self.vdcDict.values(),
                             key=lambda x: distance(dealer, x))
            # Add information to the VDCs/dealers
            dealer.setVDC(nearestvdc)
            nearestvdc.addDealer(dealer)

            # Add edge to graph
            self.G.add_edge(dealer.getName(),
                            nearestvdc.getName(),
                            weight=distance(dealer, nearestvdc))
예제 #5
0
파일: dtf.py 프로젝트: raviyer/warbirds
def _test(inf):
    lines = dtf_parse(inf)
    t = 0
    for l in lines:
        if isinstance(l, Line):
            m = distances.miles(distances.distance(l.start, l.end))
            t += m
            print dtf_text(distances.midpoint(l.start, l.end),
                           "Head %d for %0.2f miles" % (distances.heading(l.start,
                                                                          l.end),
                                                        m)),
    print "\nTotal of %f miles\n" % t
예제 #6
0
def find_most_similiar(filename, n, vantage_pts):
    """
    Finds n most similiar time series to the time series of interest (filename)
    by using the supplied vantage points
    
    filename: timeseries of interest
    n: number of similiar timeseries to return (n must be between 1 and 20)
    vantage_pts: a list of the vantage point numbers 
    
    Returns: list of n most similiar filenames
    """

    file_names = []

    #load the given file
    with open(filename, "rb") as f:
        ts1 = pickle.load(f)

    #find the most similiar vantage point = d
    vantage_pts_dist = []
    for i in vantage_pts:
        with open("GeneratedTimeseries/Timeseries" + str(i), "rb") as f:
            ts2 = pickle.load(f)
        dist = distances.distance(distances.stand(ts1, ts1.mean(), ts1.std()),
                                  distances.stand(ts2, ts2.mean(), ts2.std()),
                                  mult=1)
        vantage_pts_dist.append([dist, i])

    vantage_pts_dist.sort(key=lambda x: x[0])

    all_pts_to_check = []
    for i in range(n):
        closest_vantage_pt = vantage_pts_dist[i][1]
        radius = 2 * vantage_pts_dist[i][0]
        pts_in_radius = find_similarity_of_points_in_radius(
            closest_vantage_pt, ts1, radius)
        for j in pts_in_radius:
            if j not in all_pts_to_check:
                all_pts_to_check.append(j)

    all_pts_to_check.sort(key=lambda x: x[0])

    for i in range(1, n + 1):  #ignore given timeseries
        file_names.append(all_pts_to_check[i][1])

    return file_names
from csvreader import *
import networkx as nx
from distances import distance
import matplotlib.pyplot as plt

#locdict = readData()

locdict = {}
locdict["Los Angeles"] = Location("Los Angeles", 34.05, -118.25)
locdict["New York"] = Location("New York", 40.7128, -74.0060)
locdict["London"] = Location("London", 51.5074, -0.1278)
locdict["Tokyo"] = Location("Tokyo", 35.6895, 139.6917)

G = nx.Graph()
G.add_nodes_from(locdict.keys())

for loc1 in locdict.keys():
    for loc2 in locdict.keys():
        G.add_edge(loc1, loc2, weight=distance(locdict[loc1], locdict[loc2]))

print(G.edges.data('weight'))
'''
plt.subplot(121)
nx.draw(G, with_labels=True, font_weight='bold')
plt.show()
'''
예제 #8
0
def pick_vantage_points(arg):
    """
    Code which picks 20 vantage points and produces a database for each one.
    The database stores (key,value) pairs where:
    key = distance from timeseries to vantage point (kernel coefficient)
    value = id of timeseries (0-999)
    
    returns: list of vantage points (integers from 0-999)
    """
    try:
        parser = argparse.ArgumentParser(description="vantage points")
        parser.add_argument('--n',
                            help='number of vantage points',
                            type=int,
                            default=20)

        args = parser.parse_args(arg)
        num = args.n
    except:
        num = arg

    try:
        shutil.rmtree('VantagePointDatabases')
        os.mkdir('VantagePointDatabases')
    except:
        os.mkdir('VantagePointDatabases')

    vantage_pts = random.sample(range(0, 1000), num)

    for vantage_point in vantage_pts:
        try:
            os.remove("VantagePointDatabases/" + str(vantage_point) + ".dbdb")
            db1 = BinarySearchDatabase.connect("VantagePointDatabases/" +
                                               str(vantage_point) + ".dbdb")
        except:
            db1 = BinarySearchDatabase.connect("VantagePointDatabases/" +
                                               str(vantage_point) + ".dbdb")

        with open("GeneratedTimeseries/Timeseries" + str(vantage_point),
                  "rb") as f:
            ts2 = pickle.load(f)
        for i in range(1000):
            if i != vantage_point:
                with open("GeneratedTimeseries/Timeseries" + str(i),
                          "rb") as f:
                    ts1 = pickle.load(f)
                dist = distances.distance(
                    distances.stand(ts1, ts1.mean(), ts1.std()),
                    distances.stand(ts2, ts2.mean(), ts2.std()),
                    mult=1)
                db1.set(dist, str(i))

        db1.commit()
        db1.close()

        f = open('VantagePointDatabases/vp', 'w')
        for i in vantage_pts:
            f.write(str(i) + "\n")
        f.close()

    return vantage_pts
예제 #9
0
 def test_distance(self):
     t0 = ts(times=[0,1,2,4,5,6],values=[3,4,5,6,7,8])
     t0_stand = distances.stand(t0,t0.mean(),t0.std())
     t1 = ts(times=[0,1,2,4,5,6],values=[3,4,5,6,7,8])
     t1_stand = distances.stand(t1,t1.mean(), t1.std())        
     assert distances.distance(t0_stand, t1_stand) == 0
예제 #10
0
 def setVDC(self, vdc):
     self.vdc = vdc
     self.vdcDist = distance(self, vdc)
예제 #11
0
 def __init__(self, name, lat, lon, vdc=None):
     Location.__init__(self, name, lat, lon)
     self.vdc = vdc
     if vdc is not None:
         self.vdcDist = distance(self, vdc)
예제 #12
0
def find_most_similiar(filename, n, vantage_pts, isfile=True, dbtype='bstree'):
    """
    Finds n most similiar time series to the time series of interest (filename)
    by using the supplied vantage points
    
    filename: timeseries of interest
    n: number of similiar timeseries to return (n must be between 1 and 20)
    vantage_pts: a list of the vantage point numbers 
    
    Returns: list of n most similiar filenames
    """

    file_names = []
    #load the given file
    if isfile:
        try:
            with open(filename, "rb") as f:
                ts1 = pickle.load(f)
        except:
            print(
                'Requested %s cannot be found in database, returning ERROR INDEX'
                % filename)
            return 'ERROR INDEX'
    else:
        ts1 = filename

    ## check data type
    if not isinstance(ts1, ts):
        print(
            'Requested %s is not a TimeSeries instance, returning ERROR TYPE' %
            filename)
        return 'ERROR TYPE'

    #find the most similiar vantage point = d
    vantage_pts_dist = []
    for i in vantage_pts:
        with open("GeneratedTimeseries/Timeseries" + str(i), "rb") as f:
            ts2 = pickle.load(f)

        ## interpolate the timeseries in the database to have the same times
        ## as the client input timeseries
        ts2 = interpolate_to_match_input(ts2, ts1)

        dist = distances.distance(distances.stand(ts1, ts1.mean(), ts1.std()),
                                  distances.stand(ts2, ts2.mean(), ts2.std()),
                                  mult=1)
        vantage_pts_dist.append([dist, i])
    if n > len(vantage_pts_dist) or n < 1:
        print('More neighbours than vantage requested.')
        return 'ERROR NUMBER | {}'.format(len(vantage_pts_dist))
    vantage_pts_dist.sort(key=lambda x: x[0])

    all_pts_to_check = []
    for i in range(n):
        closest_vantage_pt = vantage_pts_dist[i][1]
        radius = 2 * vantage_pts_dist[i][0]
        pts_in_radius = find_similarity_of_points_in_radius(
            closest_vantage_pt, ts1, radius, dbtype)
        for j in pts_in_radius:
            if j not in all_pts_to_check:
                all_pts_to_check.append(j)

    all_pts_to_check.sort(key=lambda x: x[0])

    for i in range(0, n):  #ignore given timeseries
        file_names.append(all_pts_to_check[i])

    return file_names
예제 #13
0
from location import *
from distances import distance
import pandas as pd
import numpy as np
from math import sin, cos, radians, sqrt, asin

locdict = {}
locdict["Los Angeles"] = Location("Los Angeles", 34.05, -118.25)
locdict["New York"] = Location("New York", 40.7128, -74.0060)

print(locdict["Los Angeles"])
print(locdict["New York"])
print(distance(locdict["Los Angeles"], locdict["New York"]) / 1.2)