Esempio n. 1
0
def populateDb():
  conn = Connection('localhost', 27017)
  db = conn.asterank
  coll = db.asteroids
  #coll.drop()
  coll.ensure_index('full_name', unique=True, background=True)
  coll.ensure_index('score', background=True)
  coll.ensure_index('profit', background=True)
  coll.ensure_index('prov_des', background=True)  # necessary for upcoming pass lookups
  coll.ensure_index('closeness', background=True)
  coll.ensure_index('price', background=True)

  # load mass data
  print 'Loading mass data...'
  f = open(MASS_PATH, 'r')
  lines = f.readlines()
  f.close()

  massd = {}
  for line in lines:
    parts = line.split(' ')
    massidx = len(parts) - 2
    mass = float(parts[massidx])
    name = ' '.join(parts[:massidx]).strip()

    if name not in massd:
      massd[name] = []
    massd[name].append(mass)

  for name, masses in massd.iteritems():
    avg = sum(masses) / len(masses)
    massd[name] = avg
  del massd['']


  # load delta v data
  f = open(DV_PATH, 'r')
  lines = f.readlines()
  f.close()

  print 'Loading delta-v data...'
  deltav_map = {}
  for line in lines:
    parts = line.split(',')
    des = parts[0]
    dv = float(parts[1])
    deltav_map[des] = dv

  print 'Loading small body data...this may take a while'
  print DATA_PATH
  reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"')
  designation_regex = re.compile('.*\(([^\)]*)\)')
  n = 0
  for row in reader:
    if row['spec_B'] == '':
      newspec = THOLEN_MAPPINGS.get(row['spec_T'], None)
      if newspec:
        # TODO should have our own merged spec row, instead we overwrite spec_B
        row['spec_B'] = newspec.strip()
      elif row['pdes'] == '2012 DA14':
        print 'Adjust 2012 DA14'
        row['spec_B'] = 'L'
      elif row['class'] in COMET_CLASSES:
        row['spec_B'] = 'comet'
      else:
        continue # TODO temp
        #row['spec_B'] = 'S'

      """
      elif row['pdes'] == '2008 HU4':
        print 'Adjust 2008 HU4'
        row['spec_B'] = 'C'
        row['GM'] =  3.0268194e-14   # 500 tons
        row['diameter'] = 8          # 8 meters
      """

    if row['spec_B'] == 'C type':
      row['spec_B'] = 'C'

    # match it with its delta-v
    m = designation_regex.match(row['full_name'])
    if 'pdes' in row and 'prov_des' not in row:
      row['prov_des'] = row['pdes']  # backwards compatibility for NASA change
    if m:
      row['prov_des'] = m.groups()[0]
      dv = deltav_map.get(row['prov_des'], None)
      if dv:
        row['dv'] = dv
    else:
      row['prov_des'] = ''

    # Clean up inputs
    for key,val in row.items():
      try:
        fv = float(val)
      except ValueError, TypeError:
        row[key] = val.strip()
      else:
        row[key] = fv
    row['spec_T'] = row['spec_T'].replace(':', '')
    row['spec_B'] = row['spec_B'].replace(':', '')

    # match mass
    if row['full_name'] in massd:
      row['GM'] = massd[row['full_name']] * G

    # compute score
    row['price'], row['saved'] = scoring.price(row)
    # assume the cost of mining a distant asteroid per kg won't be much better
    # than cost to launch from earth
    # ie., 99.999% of revenue is spent on operations
    row['saved'] = row['saved'] * 0.00001
    row['closeness'] = scoring.closeness_weight(row)
    row['profit'] = scoring.profit(row)

    # TODO move this into scoring once I get it right
    score = min(row['price'], 1e14) / 5e12
    if score < 0.0001:
      # It's worthless, so closeness doesn't matter
      row['score'] = score
    else:
      score = score * row['closeness']**3
    row['score'] = score

    coll.update({'full_name': row['full_name']}, {'$set': row}, True)  # upsert
    n += 1
    if n % 3000 == 0:
      print n, '...'
Esempio n. 2
0
def _run(partial=False):
    # Constants and settings

    # Fill database
    conn = Connection('localhost', 27017)
    db = conn.asterank
    coll = db.asteroids
    print 'Dropping asteroids (SBDB) collection...'
    coll.drop()
    coll.ensure_index('full_name', unique=True, background=True)
    coll.ensure_index('score', background=True)
    coll.ensure_index('profit', background=True)
    coll.ensure_index('prov_des',
                      background=True)  # necessary for upcoming pass lookups
    coll.ensure_index('closeness', background=True)
    coll.ensure_index('price', background=True)

    # load mass data
    print 'Loading mass data...'
    f = open(MASS_PATH, 'r')
    lines = f.readlines()
    f.close()

    massd = {}
    for line in lines:
        parts = line.split(' ')
        massidx = len(parts) - 2
        mass = float(parts[massidx])
        name = ' '.join(parts[:massidx]).strip()

        if name not in massd:
            massd[name] = []
        massd[name].append(mass)

    for name, masses in massd.iteritems():
        avg = sum(masses) / len(masses)
        massd[name] = avg
    del massd['']

    # load delta v data
    print 'Loading delta-v data...'
    reader = csv.DictReader(open(DV_PATH, 'r'))
    deltav_map = {}
    for row in reader:
        deltav_map[row['pdes']] = row['dv']

    print 'Loading small body data...this may take a while'
    print DATA_PATH
    reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"')
    designation_regex = re.compile('.*\(([^\)]*)\)')
    n = 0
    items = []
    for row in reader:
        row['spec'] = row['spec_B']
        row['full_name'] = row['full_name'].strip()
        if row['spec'] == '':
            newspec = THOLEN_MAPPINGS.get(row['spec_T'], None)
            if newspec:
                row['spec'] = newspec.strip()
            # TODO(@ian) move specific adjustments out into its own file.
            elif row['pdes'] == '2012 DA14':
                print 'Adjust 2012 DA14'
                row['spec'] = 'L'
            elif row['full_name'] == '6178 (1986 DA)':
                print 'Adjust 1986 DA'
                row['spec'] = 'M'
            elif row['full_name'] == '436724 (2011 UW158)':
                print 'Adjust 2011 UW158'
                row['spec'] = 'Xc'
            elif row['class'] in COMET_CLASSES:
                row['spec'] = 'comet'
            else:
                if partial:
                    # don't build the full db of 600k objects
                    continue
                row['spec'] = '?'

        if row['spec'] == 'C type':
            row['spec'] = 'C'

        # match it with its delta-v
        # TODO(@ian) don't overwrite prov_des, create some unified name field instead.
        m = designation_regex.match(row['full_name'])
        if 'pdes' in row and 'prov_des' not in row:
            row['prov_des'] = row[
                'pdes']  # backwards compatibility for NASA change
        if m:
            # Set delta-v first
            dv = deltav_map.get(row['prov_des'], None)
            if dv:
                row['dv'] = dv
            row['prov_des'] = m.groups()[0]
        else:
            row['prov_des'] = ''

        # Clean up inputs
        for key, val in row.items():
            try:
                fv = float(val)
            except ValueError, TypeError:
                row[key] = val.strip()
            else:
                row[key] = fv
        row['spec_T'] = row['spec_T'].replace(':', '')
        row['spec_B'] = row['spec_B'].replace(':', '')
        row['spec'] = row['spec'].replace(':', '')

        # match mass
        if row['full_name'] in massd:
            row['GM'] = massd[row['full_name']] * G

        # compute score
        row['price'], row['saved'] = scoring.price(row)
        # assume the cost of mining a distant asteroid per kg won't be much better
        # than cost to launch from earth
        # ie., 99.999% of revenue is spent on operations
        row['saved'] = row['saved'] * 0.00001
        row['closeness'] = scoring.closeness_weight(row)
        row['profit'] = scoring.profit(row)

        # TODO move this final scoring pass into scoring.py

        # cap price influence on score at 10 B
        score = min(row['price'], 1e10) / 5e11
        if score > 0.0001:
            score = score + row['closeness'] / 20
        row['score'] = score

        items.append(row)
        n += 1
        if len(items) > 20000:
            # insert into mongo
            print 'Row #', n, '... inserting/updating %d items into asteroids (SBDB) collection' % (
                len(items))
            coll.insert(items, continue_on_error=True)
            items = []
Esempio n. 3
0
def _run(partial=False):
    # Constants and settings
    # TODO this method should be factored out

    # Fill database
    conn = Connection("localhost", 27017)
    db = conn.asterank
    coll = db.asteroids
    # coll.drop()
    coll.ensure_index("full_name", unique=True, background=True)
    coll.ensure_index("score", background=True)
    coll.ensure_index("profit", background=True)
    coll.ensure_index("prov_des", background=True)  # necessary for upcoming pass lookups
    coll.ensure_index("closeness", background=True)
    coll.ensure_index("price", background=True)

    # load mass data
    print "Loading mass data..."
    f = open(MASS_PATH, "r")
    lines = f.readlines()
    f.close()

    massd = {}
    for line in lines:
        parts = line.split(" ")
        massidx = len(parts) - 2
        mass = float(parts[massidx])
        name = " ".join(parts[:massidx]).strip()

        if name not in massd:
            massd[name] = []
        massd[name].append(mass)

    for name, masses in massd.iteritems():
        avg = sum(masses) / len(masses)
        massd[name] = avg
    del massd[""]

    # load delta v data
    print "Loading delta-v data..."
    reader = csv.DictReader(open(DV_PATH, "r"))
    deltav_map = {}
    for row in reader:
        deltav_map[row["pdes"]] = row["dv"]

    print "Loading small body data...this may take a while"
    print DATA_PATH
    reader = csv.DictReader(open(DATA_PATH), delimiter=",", quotechar='"')
    designation_regex = re.compile(".*\(([^\)]*)\)")
    n = 0
    for row in reader:
        row["spec"] = row["spec_B"]
        if row["spec"] == "":
            newspec = THOLEN_MAPPINGS.get(row["spec_T"], None)
            if newspec:
                row["spec"] = newspec.strip()
            elif row["pdes"] == "2012 DA14":
                print "Adjust 2012 DA14"
                row["spec"] = "L"
            elif row["class"] in COMET_CLASSES:
                row["spec"] = "comet"
            else:
                if partial:
                    continue  # don't build the full db of 600k objects
                row["spec"] = "?"

        if row["spec"] == "C type":
            row["spec"] = "C"

        # match it with its delta-v
        # TODO(@ian) don't overwrite prov_des, create some unified name field instead.
        m = designation_regex.match(row["full_name"])
        if "pdes" in row and "prov_des" not in row:
            row["prov_des"] = row["pdes"]  # backwards compatibility for NASA change
        if m:
            # Set delta-v first
            dv = deltav_map.get(row["prov_des"], None)
            if dv:
                row["dv"] = dv
            row["prov_des"] = m.groups()[0]
        else:
            row["prov_des"] = ""

        # Clean up inputs
        for key, val in row.items():
            try:
                fv = float(val)
            except ValueError, TypeError:
                row[key] = val.strip()
            else:
                row[key] = fv
        row["spec_T"] = row["spec_T"].replace(":", "")
        row["spec_B"] = row["spec_B"].replace(":", "")
        row["spec"] = row["spec"].replace(":", "")

        # match mass
        if row["full_name"] in massd:
            row["GM"] = massd[row["full_name"]] * G

        # compute score
        row["price"], row["saved"] = scoring.price(row)
        # assume the cost of mining a distant asteroid per kg won't be much better
        # than cost to launch from earth
        # ie., 99.999% of revenue is spent on operations
        row["saved"] = row["saved"] * 0.00001
        row["closeness"] = scoring.closeness_weight(row)
        row["profit"] = scoring.profit(row)

        # TODO move this final scoring pass into scoring.py
        score = min(row["price"], 1e14) / 5e13
        if score < 0.0001:
            # It's worthless, so closeness doesn't matter
            row["score"] = score
        else:
            score = score * row["closeness"]
        row["score"] = score

        coll.update({"full_name": row["full_name"]}, {"$set": row}, True)  # upsert
        n += 1
        if n % 3000 == 0:
            print n, "..."
Esempio n. 4
0
def _run(partial=False):
  # Constants and settings

  # Fill database
  conn = Connection('localhost', 27017)
  db = conn.asterank
  coll = db.asteroids
  print 'Dropping asteroids (SBDB) collection...'
  coll.drop()
  coll.ensure_index('full_name', unique=True, background=True)
  coll.ensure_index('score', background=True)
  coll.ensure_index('profit', background=True)
  coll.ensure_index('prov_des', background=True)  # necessary for upcoming pass lookups
  coll.ensure_index('closeness', background=True)
  coll.ensure_index('price', background=True)

  # load mass data
  print 'Loading mass data...'
  f = open(MASS_PATH, 'r')
  lines = f.readlines()
  f.close()

  massd = {}
  for line in lines:
    parts = line.split(' ')
    massidx = len(parts) - 2
    mass = float(parts[massidx])
    name = ' '.join(parts[:massidx]).strip()

    if name not in massd:
      massd[name] = []
    massd[name].append(mass)

  for name, masses in massd.iteritems():
    avg = sum(masses) / len(masses)
    massd[name] = avg
  del massd['']

  # load delta v data
  print 'Loading delta-v data...'
  reader = csv.DictReader(open(DV_PATH, 'r'))
  deltav_map = {}
  for row in reader:
    deltav_map[row['pdes']] = row['dv']

  print 'Loading small body data...this may take a while'
  print DATA_PATH
  reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"')
  designation_regex = re.compile('.*\(([^\)]*)\)')
  n = 0
  items = []
  for row in reader:
    row['spec'] = row['spec_B']
    row['full_name'] = row['full_name'].strip()
    if row['spec'] == '':
      newspec = THOLEN_MAPPINGS.get(row['spec_T'], None)
      if newspec:
        row['spec'] = newspec.strip()
      # TODO(@ian) move specific adjustments out into its own file.
      elif row['pdes'] == '2012 DA14':
        print 'Adjust 2012 DA14'
        row['spec'] = 'L'
      elif row['full_name'] == '6178 (1986 DA)':
        print 'Adjust 1986 DA'
        row['spec'] = 'M'
      elif row['full_name'] == '436724 (2011 UW158)':
        print 'Adjust 2011 UW158'
        row['spec'] = 'Xc'
      elif row['class'] in COMET_CLASSES:
        row['spec'] = 'comet'
      else:
        if partial:
          # don't build the full db of 600k objects
          continue
        row['spec'] = '?'

    if row['spec'] == 'C type':
      row['spec'] = 'C'

    # match it with its delta-v
    # TODO(@ian) don't overwrite prov_des, create some unified name field instead.
    m = designation_regex.match(row['full_name'])
    if 'pdes' in row and 'prov_des' not in row:
      row['prov_des'] = row['pdes']  # backwards compatibility for NASA change
    if m:
      # Set delta-v first
      dv = deltav_map.get(row['prov_des'], None)
      if dv:
        row['dv'] = dv
      row['prov_des'] = m.groups()[0]
    else:
      row['prov_des'] = ''

    # Clean up inputs
    for key,val in row.items():
      try:
        fv = float(val)
      except ValueError, TypeError:
        row[key] = val.strip()
      else:
        row[key] = fv
#     row['spec_T'] = row['spec_T'].replace(':', '')
#     row['spec_B'] = row['spec_B'].replace(':', '')
#     row['spec'] = row['spec'].replace(':', '')

    # match mass
    if row['full_name'] in massd:
      row['GM'] = massd[row['full_name']] * G

    # compute score
    row['price'], row['saved'] = scoring.price(row)
    # assume the cost of mining a distant asteroid per kg won't be much better
    # than cost to launch from earth
    # ie., 99.999% of revenue is spent on operations
    row['saved'] = row['saved'] * 0.00001
    row['closeness'] = scoring.closeness_weight(row)
    row['profit'] = scoring.profit(row)

    # TODO move this final scoring pass into scoring.py

    # cap price influence on score at 10 B
    score = min(row['price'], 1e10) / 5e11
    if score > 0.0001:
      score = score + row['closeness'] / 20
    row['score'] = score

    items.append(row)
    n += 1
    if len(items) > 20000:
      # insert into mongo
      print 'Row #', n, '... inserting/updating %d items into asteroids (SBDB) collection' % (len(items))
      coll.insert(items, continue_on_error=True)
      items = []
Esempio n. 5
0
def _run(partial=False):
  # Constants and settings
  # TODO this method should be factored out

  # Fill database
  conn = Connection('localhost', 27017)
  db = conn.asterank
  coll = db.asteroids
  #coll.drop()
  coll.ensure_index('full_name', unique=True, background=True)
  coll.ensure_index('score', background=True)
  coll.ensure_index('profit', background=True)
  coll.ensure_index('prov_des', background=True)  # necessary for upcoming pass lookups
  coll.ensure_index('closeness', background=True)
  coll.ensure_index('price', background=True)

  # load mass data
  print 'Loading mass data...'
  f = open(MASS_PATH, 'r')
  lines = f.readlines()
  f.close()

  massd = {}
  for line in lines:
    parts = line.split(' ')
    massidx = len(parts) - 2
    mass = float(parts[massidx])
    name = ' '.join(parts[:massidx]).strip()

    if name not in massd:
      massd[name] = []
    massd[name].append(mass)

  for name, masses in massd.iteritems():
    avg = sum(masses) / len(masses)
    massd[name] = avg
  del massd['']

  # load delta v data
  print 'Loading delta-v data...'
  reader = csv.DictReader(open(DV_PATH, 'r'))
  deltav_map = {}
  for row in reader:
    deltav_map[row['pdes']] = row['dv']

  print 'Loading small body data...this may take a while'
  print DATA_PATH
  reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"')
  designation_regex = re.compile('.*\(([^\)]*)\)')
  n = 0
  for row in reader:
    row['spec'] = row['spec_B']
    if row['spec'] == '':
      newspec = THOLEN_MAPPINGS.get(row['spec_T'], None)
      if newspec:
        row['spec'] = newspec.strip()
      elif row['pdes'] == '2012 DA14':
        print 'Adjust 2012 DA14'
        row['spec'] = 'L'
      elif row['class'] in COMET_CLASSES:
        row['spec'] = 'comet'
      else:
        if partial:
          continue  # don't build the full db of 600k objects
        row['spec'] = '?'

    if row['spec'] == 'C type':
      row['spec'] = 'C'

    # match it with its delta-v
    # TODO(@ian) don't overwrite prov_des, create some unified name field instead.
    m = designation_regex.match(row['full_name'])
    if 'pdes' in row and 'prov_des' not in row:
      row['prov_des'] = row['pdes']  # backwards compatibility for NASA change
    if m:
      # Set delta-v first
      dv = deltav_map.get(row['prov_des'], None)
      if dv:
        row['dv'] = dv
      row['prov_des'] = m.groups()[0]
    else:
      row['prov_des'] = ''

    # Clean up inputs
    for key,val in row.items():
      try:
        fv = float(val)
      except ValueError, TypeError:
        row[key] = val.strip()
      else:
        row[key] = fv
    row['spec_T'] = row['spec_T'].replace(':', '')
    row['spec_B'] = row['spec_B'].replace(':', '')
    row['spec'] = row['spec'].replace(':', '')

    # match mass
    if row['full_name'] in massd:
      row['GM'] = massd[row['full_name']] * G

    # compute score
    row['price'], row['saved'] = scoring.price(row)
    # assume the cost of mining a distant asteroid per kg won't be much better
    # than cost to launch from earth
    # ie., 99.999% of revenue is spent on operations
    row['saved'] = row['saved'] * 0.00001
    row['closeness'] = scoring.closeness_weight(row)
    row['profit'] = scoring.profit(row)

    # TODO move this final scoring pass into scoring.py
    score = min(row['price'], 1e14) / 5e13
    if score < 0.0001:
      # It's worthless, so closeness doesn't matter
      row['score'] = score
    else:
      score = score * row['closeness']
    row['score'] = score

    coll.update({'full_name': row['full_name']}, {'$set': row}, True)  # upsert
    n += 1
    if n % 3000 == 0:
      print n, '...'
Esempio n. 6
0
def _run(partial=False):
    # Constants and settings
    # TODO this method should be factored out

    # Fill database
    conn = Connection('localhost', 27017)
    db = conn.asterank
    coll = db.asteroids
    #coll.drop()
    coll.ensure_index('full_name', unique=True, background=True)
    coll.ensure_index('score', background=True)
    coll.ensure_index('profit', background=True)
    coll.ensure_index('prov_des',
                      background=True)  # necessary for upcoming pass lookups
    coll.ensure_index('closeness', background=True)
    coll.ensure_index('price', background=True)

    # load mass data
    print 'Loading mass data...'
    f = open(MASS_PATH, 'r')
    lines = f.readlines()
    f.close()

    massd = {}
    for line in lines:
        parts = line.split(' ')
        massidx = len(parts) - 2
        mass = float(parts[massidx])
        name = ' '.join(parts[:massidx]).strip()

        if name not in massd:
            massd[name] = []
        massd[name].append(mass)

    for name, masses in massd.iteritems():
        avg = sum(masses) / len(masses)
        massd[name] = avg
    del massd['']

    # load delta v data
    print 'Loading delta-v data...'
    reader = csv.DictReader(open(DV_PATH, 'r'))
    deltav_map = {}
    for row in reader:
        deltav_map[row['pdes']] = row['dv']

    print 'Loading small body data...this may take a while'
    print DATA_PATH
    reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"')
    designation_regex = re.compile('.*\(([^\)]*)\)')
    n = 0
    for row in reader:
        row['spec'] = row['spec_B']
        if row['spec'] == '':
            newspec = THOLEN_MAPPINGS.get(row['spec_T'], None)
            if newspec:
                row['spec'] = newspec.strip()
            elif row['pdes'] == '2012 DA14':
                print 'Adjust 2012 DA14'
                row['spec'] = 'L'
            elif row['class'] in COMET_CLASSES:
                row['spec'] = 'comet'
            else:
                if partial:
                    continue  # don't build the full db of 600k objects
                row['spec'] = '?'

        if row['spec'] == 'C type':
            row['spec'] = 'C'

        # match it with its delta-v
        # TODO(@ian) don't overwrite prov_des, create some unified name field instead.
        m = designation_regex.match(row['full_name'])
        if 'pdes' in row and 'prov_des' not in row:
            row['prov_des'] = row[
                'pdes']  # backwards compatibility for NASA change
        if m:
            # Set delta-v first
            dv = deltav_map.get(row['prov_des'], None)
            if dv:
                row['dv'] = dv
            row['prov_des'] = m.groups()[0]
        else:
            row['prov_des'] = ''

        # Clean up inputs
        for key, val in row.items():
            try:
                fv = float(val)
            except ValueError, TypeError:
                row[key] = val.strip()
            else:
                row[key] = fv
        row['spec_T'] = row['spec_T'].replace(':', '')
        row['spec_B'] = row['spec_B'].replace(':', '')
        row['spec'] = row['spec'].replace(':', '')

        # match mass
        if row['full_name'] in massd:
            row['GM'] = massd[row['full_name']] * G

        # compute score
        row['price'], row['saved'] = scoring.price(row)
        # assume the cost of mining a distant asteroid per kg won't be much better
        # than cost to launch from earth
        # ie., 99.999% of revenue is spent on operations
        row['saved'] = row['saved'] * 0.00001
        row['closeness'] = scoring.closeness_weight(row)
        row['profit'] = scoring.profit(row)

        # TODO this belongs in scoring
        score = min(row['price'], 1e14) / 5e12
        if score < 0.0001:
            # It's worthless, so closeness doesn't matter
            row['score'] = score
        else:
            score = score * row['closeness']**3
        row['score'] = score

        coll.update({'full_name': row['full_name']}, {'$set': row},
                    True)  # upsert
        n += 1
        if n % 3000 == 0:
            print n, '...'