def populateDb(): conn = Connection('localhost', 27017) db = conn.asterank coll = db.asteroids #coll.drop() coll.ensure_index('full_name', unique=True, background=True) coll.ensure_index('score', background=True) coll.ensure_index('profit', background=True) coll.ensure_index('prov_des', background=True) # necessary for upcoming pass lookups coll.ensure_index('closeness', background=True) coll.ensure_index('price', background=True) # load mass data print 'Loading mass data...' f = open(MASS_PATH, 'r') lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(' ') massidx = len(parts) - 2 mass = float(parts[massidx]) name = ' '.join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[''] # load delta v data f = open(DV_PATH, 'r') lines = f.readlines() f.close() print 'Loading delta-v data...' deltav_map = {} for line in lines: parts = line.split(',') des = parts[0] dv = float(parts[1]) deltav_map[des] = dv print 'Loading small body data...this may take a while' print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"') designation_regex = re.compile('.*\(([^\)]*)\)') n = 0 for row in reader: if row['spec_B'] == '': newspec = THOLEN_MAPPINGS.get(row['spec_T'], None) if newspec: # TODO should have our own merged spec row, instead we overwrite spec_B row['spec_B'] = newspec.strip() elif row['pdes'] == '2012 DA14': print 'Adjust 2012 DA14' row['spec_B'] = 'L' elif row['class'] in COMET_CLASSES: row['spec_B'] = 'comet' else: continue # TODO temp #row['spec_B'] = 'S' """ elif row['pdes'] == '2008 HU4': print 'Adjust 2008 HU4' row['spec_B'] = 'C' row['GM'] = 3.0268194e-14 # 500 tons row['diameter'] = 8 # 8 meters """ if row['spec_B'] == 'C type': row['spec_B'] = 'C' # match it with its delta-v m = designation_regex.match(row['full_name']) if 'pdes' in row and 'prov_des' not in row: row['prov_des'] = row['pdes'] # backwards compatibility for NASA change if m: row['prov_des'] = m.groups()[0] dv = deltav_map.get(row['prov_des'], None) if dv: row['dv'] = dv else: row['prov_des'] = '' # Clean up inputs for key,val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv row['spec_T'] = row['spec_T'].replace(':', '') row['spec_B'] = row['spec_B'].replace(':', '') # match mass if row['full_name'] in massd: row['GM'] = massd[row['full_name']] * G # compute score row['price'], row['saved'] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row['saved'] = row['saved'] * 0.00001 row['closeness'] = scoring.closeness_weight(row) row['profit'] = scoring.profit(row) # TODO move this into scoring once I get it right score = min(row['price'], 1e14) / 5e12 if score < 0.0001: # It's worthless, so closeness doesn't matter row['score'] = score else: score = score * row['closeness']**3 row['score'] = score coll.update({'full_name': row['full_name']}, {'$set': row}, True) # upsert n += 1 if n % 3000 == 0: print n, '...'
def _run(partial=False): # Constants and settings # Fill database conn = Connection('localhost', 27017) db = conn.asterank coll = db.asteroids print 'Dropping asteroids (SBDB) collection...' coll.drop() coll.ensure_index('full_name', unique=True, background=True) coll.ensure_index('score', background=True) coll.ensure_index('profit', background=True) coll.ensure_index('prov_des', background=True) # necessary for upcoming pass lookups coll.ensure_index('closeness', background=True) coll.ensure_index('price', background=True) # load mass data print 'Loading mass data...' f = open(MASS_PATH, 'r') lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(' ') massidx = len(parts) - 2 mass = float(parts[massidx]) name = ' '.join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[''] # load delta v data print 'Loading delta-v data...' reader = csv.DictReader(open(DV_PATH, 'r')) deltav_map = {} for row in reader: deltav_map[row['pdes']] = row['dv'] print 'Loading small body data...this may take a while' print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"') designation_regex = re.compile('.*\(([^\)]*)\)') n = 0 items = [] for row in reader: row['spec'] = row['spec_B'] row['full_name'] = row['full_name'].strip() if row['spec'] == '': newspec = THOLEN_MAPPINGS.get(row['spec_T'], None) if newspec: row['spec'] = newspec.strip() # TODO(@ian) move specific adjustments out into its own file. elif row['pdes'] == '2012 DA14': print 'Adjust 2012 DA14' row['spec'] = 'L' elif row['full_name'] == '6178 (1986 DA)': print 'Adjust 1986 DA' row['spec'] = 'M' elif row['full_name'] == '436724 (2011 UW158)': print 'Adjust 2011 UW158' row['spec'] = 'Xc' elif row['class'] in COMET_CLASSES: row['spec'] = 'comet' else: if partial: # don't build the full db of 600k objects continue row['spec'] = '?' if row['spec'] == 'C type': row['spec'] = 'C' # match it with its delta-v # TODO(@ian) don't overwrite prov_des, create some unified name field instead. m = designation_regex.match(row['full_name']) if 'pdes' in row and 'prov_des' not in row: row['prov_des'] = row[ 'pdes'] # backwards compatibility for NASA change if m: # Set delta-v first dv = deltav_map.get(row['prov_des'], None) if dv: row['dv'] = dv row['prov_des'] = m.groups()[0] else: row['prov_des'] = '' # Clean up inputs for key, val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv row['spec_T'] = row['spec_T'].replace(':', '') row['spec_B'] = row['spec_B'].replace(':', '') row['spec'] = row['spec'].replace(':', '') # match mass if row['full_name'] in massd: row['GM'] = massd[row['full_name']] * G # compute score row['price'], row['saved'] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row['saved'] = row['saved'] * 0.00001 row['closeness'] = scoring.closeness_weight(row) row['profit'] = scoring.profit(row) # TODO move this final scoring pass into scoring.py # cap price influence on score at 10 B score = min(row['price'], 1e10) / 5e11 if score > 0.0001: score = score + row['closeness'] / 20 row['score'] = score items.append(row) n += 1 if len(items) > 20000: # insert into mongo print 'Row #', n, '... inserting/updating %d items into asteroids (SBDB) collection' % ( len(items)) coll.insert(items, continue_on_error=True) items = []
def _run(partial=False): # Constants and settings # TODO this method should be factored out # Fill database conn = Connection("localhost", 27017) db = conn.asterank coll = db.asteroids # coll.drop() coll.ensure_index("full_name", unique=True, background=True) coll.ensure_index("score", background=True) coll.ensure_index("profit", background=True) coll.ensure_index("prov_des", background=True) # necessary for upcoming pass lookups coll.ensure_index("closeness", background=True) coll.ensure_index("price", background=True) # load mass data print "Loading mass data..." f = open(MASS_PATH, "r") lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(" ") massidx = len(parts) - 2 mass = float(parts[massidx]) name = " ".join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[""] # load delta v data print "Loading delta-v data..." reader = csv.DictReader(open(DV_PATH, "r")) deltav_map = {} for row in reader: deltav_map[row["pdes"]] = row["dv"] print "Loading small body data...this may take a while" print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=",", quotechar='"') designation_regex = re.compile(".*\(([^\)]*)\)") n = 0 for row in reader: row["spec"] = row["spec_B"] if row["spec"] == "": newspec = THOLEN_MAPPINGS.get(row["spec_T"], None) if newspec: row["spec"] = newspec.strip() elif row["pdes"] == "2012 DA14": print "Adjust 2012 DA14" row["spec"] = "L" elif row["class"] in COMET_CLASSES: row["spec"] = "comet" else: if partial: continue # don't build the full db of 600k objects row["spec"] = "?" if row["spec"] == "C type": row["spec"] = "C" # match it with its delta-v # TODO(@ian) don't overwrite prov_des, create some unified name field instead. m = designation_regex.match(row["full_name"]) if "pdes" in row and "prov_des" not in row: row["prov_des"] = row["pdes"] # backwards compatibility for NASA change if m: # Set delta-v first dv = deltav_map.get(row["prov_des"], None) if dv: row["dv"] = dv row["prov_des"] = m.groups()[0] else: row["prov_des"] = "" # Clean up inputs for key, val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv row["spec_T"] = row["spec_T"].replace(":", "") row["spec_B"] = row["spec_B"].replace(":", "") row["spec"] = row["spec"].replace(":", "") # match mass if row["full_name"] in massd: row["GM"] = massd[row["full_name"]] * G # compute score row["price"], row["saved"] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row["saved"] = row["saved"] * 0.00001 row["closeness"] = scoring.closeness_weight(row) row["profit"] = scoring.profit(row) # TODO move this final scoring pass into scoring.py score = min(row["price"], 1e14) / 5e13 if score < 0.0001: # It's worthless, so closeness doesn't matter row["score"] = score else: score = score * row["closeness"] row["score"] = score coll.update({"full_name": row["full_name"]}, {"$set": row}, True) # upsert n += 1 if n % 3000 == 0: print n, "..."
def _run(partial=False): # Constants and settings # Fill database conn = Connection('localhost', 27017) db = conn.asterank coll = db.asteroids print 'Dropping asteroids (SBDB) collection...' coll.drop() coll.ensure_index('full_name', unique=True, background=True) coll.ensure_index('score', background=True) coll.ensure_index('profit', background=True) coll.ensure_index('prov_des', background=True) # necessary for upcoming pass lookups coll.ensure_index('closeness', background=True) coll.ensure_index('price', background=True) # load mass data print 'Loading mass data...' f = open(MASS_PATH, 'r') lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(' ') massidx = len(parts) - 2 mass = float(parts[massidx]) name = ' '.join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[''] # load delta v data print 'Loading delta-v data...' reader = csv.DictReader(open(DV_PATH, 'r')) deltav_map = {} for row in reader: deltav_map[row['pdes']] = row['dv'] print 'Loading small body data...this may take a while' print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"') designation_regex = re.compile('.*\(([^\)]*)\)') n = 0 items = [] for row in reader: row['spec'] = row['spec_B'] row['full_name'] = row['full_name'].strip() if row['spec'] == '': newspec = THOLEN_MAPPINGS.get(row['spec_T'], None) if newspec: row['spec'] = newspec.strip() # TODO(@ian) move specific adjustments out into its own file. elif row['pdes'] == '2012 DA14': print 'Adjust 2012 DA14' row['spec'] = 'L' elif row['full_name'] == '6178 (1986 DA)': print 'Adjust 1986 DA' row['spec'] = 'M' elif row['full_name'] == '436724 (2011 UW158)': print 'Adjust 2011 UW158' row['spec'] = 'Xc' elif row['class'] in COMET_CLASSES: row['spec'] = 'comet' else: if partial: # don't build the full db of 600k objects continue row['spec'] = '?' if row['spec'] == 'C type': row['spec'] = 'C' # match it with its delta-v # TODO(@ian) don't overwrite prov_des, create some unified name field instead. m = designation_regex.match(row['full_name']) if 'pdes' in row and 'prov_des' not in row: row['prov_des'] = row['pdes'] # backwards compatibility for NASA change if m: # Set delta-v first dv = deltav_map.get(row['prov_des'], None) if dv: row['dv'] = dv row['prov_des'] = m.groups()[0] else: row['prov_des'] = '' # Clean up inputs for key,val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv # row['spec_T'] = row['spec_T'].replace(':', '') # row['spec_B'] = row['spec_B'].replace(':', '') # row['spec'] = row['spec'].replace(':', '') # match mass if row['full_name'] in massd: row['GM'] = massd[row['full_name']] * G # compute score row['price'], row['saved'] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row['saved'] = row['saved'] * 0.00001 row['closeness'] = scoring.closeness_weight(row) row['profit'] = scoring.profit(row) # TODO move this final scoring pass into scoring.py # cap price influence on score at 10 B score = min(row['price'], 1e10) / 5e11 if score > 0.0001: score = score + row['closeness'] / 20 row['score'] = score items.append(row) n += 1 if len(items) > 20000: # insert into mongo print 'Row #', n, '... inserting/updating %d items into asteroids (SBDB) collection' % (len(items)) coll.insert(items, continue_on_error=True) items = []
def _run(partial=False): # Constants and settings # TODO this method should be factored out # Fill database conn = Connection('localhost', 27017) db = conn.asterank coll = db.asteroids #coll.drop() coll.ensure_index('full_name', unique=True, background=True) coll.ensure_index('score', background=True) coll.ensure_index('profit', background=True) coll.ensure_index('prov_des', background=True) # necessary for upcoming pass lookups coll.ensure_index('closeness', background=True) coll.ensure_index('price', background=True) # load mass data print 'Loading mass data...' f = open(MASS_PATH, 'r') lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(' ') massidx = len(parts) - 2 mass = float(parts[massidx]) name = ' '.join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[''] # load delta v data print 'Loading delta-v data...' reader = csv.DictReader(open(DV_PATH, 'r')) deltav_map = {} for row in reader: deltav_map[row['pdes']] = row['dv'] print 'Loading small body data...this may take a while' print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"') designation_regex = re.compile('.*\(([^\)]*)\)') n = 0 for row in reader: row['spec'] = row['spec_B'] if row['spec'] == '': newspec = THOLEN_MAPPINGS.get(row['spec_T'], None) if newspec: row['spec'] = newspec.strip() elif row['pdes'] == '2012 DA14': print 'Adjust 2012 DA14' row['spec'] = 'L' elif row['class'] in COMET_CLASSES: row['spec'] = 'comet' else: if partial: continue # don't build the full db of 600k objects row['spec'] = '?' if row['spec'] == 'C type': row['spec'] = 'C' # match it with its delta-v # TODO(@ian) don't overwrite prov_des, create some unified name field instead. m = designation_regex.match(row['full_name']) if 'pdes' in row and 'prov_des' not in row: row['prov_des'] = row['pdes'] # backwards compatibility for NASA change if m: # Set delta-v first dv = deltav_map.get(row['prov_des'], None) if dv: row['dv'] = dv row['prov_des'] = m.groups()[0] else: row['prov_des'] = '' # Clean up inputs for key,val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv row['spec_T'] = row['spec_T'].replace(':', '') row['spec_B'] = row['spec_B'].replace(':', '') row['spec'] = row['spec'].replace(':', '') # match mass if row['full_name'] in massd: row['GM'] = massd[row['full_name']] * G # compute score row['price'], row['saved'] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row['saved'] = row['saved'] * 0.00001 row['closeness'] = scoring.closeness_weight(row) row['profit'] = scoring.profit(row) # TODO move this final scoring pass into scoring.py score = min(row['price'], 1e14) / 5e13 if score < 0.0001: # It's worthless, so closeness doesn't matter row['score'] = score else: score = score * row['closeness'] row['score'] = score coll.update({'full_name': row['full_name']}, {'$set': row}, True) # upsert n += 1 if n % 3000 == 0: print n, '...'
def _run(partial=False): # Constants and settings # TODO this method should be factored out # Fill database conn = Connection('localhost', 27017) db = conn.asterank coll = db.asteroids #coll.drop() coll.ensure_index('full_name', unique=True, background=True) coll.ensure_index('score', background=True) coll.ensure_index('profit', background=True) coll.ensure_index('prov_des', background=True) # necessary for upcoming pass lookups coll.ensure_index('closeness', background=True) coll.ensure_index('price', background=True) # load mass data print 'Loading mass data...' f = open(MASS_PATH, 'r') lines = f.readlines() f.close() massd = {} for line in lines: parts = line.split(' ') massidx = len(parts) - 2 mass = float(parts[massidx]) name = ' '.join(parts[:massidx]).strip() if name not in massd: massd[name] = [] massd[name].append(mass) for name, masses in massd.iteritems(): avg = sum(masses) / len(masses) massd[name] = avg del massd[''] # load delta v data print 'Loading delta-v data...' reader = csv.DictReader(open(DV_PATH, 'r')) deltav_map = {} for row in reader: deltav_map[row['pdes']] = row['dv'] print 'Loading small body data...this may take a while' print DATA_PATH reader = csv.DictReader(open(DATA_PATH), delimiter=',', quotechar='"') designation_regex = re.compile('.*\(([^\)]*)\)') n = 0 for row in reader: row['spec'] = row['spec_B'] if row['spec'] == '': newspec = THOLEN_MAPPINGS.get(row['spec_T'], None) if newspec: row['spec'] = newspec.strip() elif row['pdes'] == '2012 DA14': print 'Adjust 2012 DA14' row['spec'] = 'L' elif row['class'] in COMET_CLASSES: row['spec'] = 'comet' else: if partial: continue # don't build the full db of 600k objects row['spec'] = '?' if row['spec'] == 'C type': row['spec'] = 'C' # match it with its delta-v # TODO(@ian) don't overwrite prov_des, create some unified name field instead. m = designation_regex.match(row['full_name']) if 'pdes' in row and 'prov_des' not in row: row['prov_des'] = row[ 'pdes'] # backwards compatibility for NASA change if m: # Set delta-v first dv = deltav_map.get(row['prov_des'], None) if dv: row['dv'] = dv row['prov_des'] = m.groups()[0] else: row['prov_des'] = '' # Clean up inputs for key, val in row.items(): try: fv = float(val) except ValueError, TypeError: row[key] = val.strip() else: row[key] = fv row['spec_T'] = row['spec_T'].replace(':', '') row['spec_B'] = row['spec_B'].replace(':', '') row['spec'] = row['spec'].replace(':', '') # match mass if row['full_name'] in massd: row['GM'] = massd[row['full_name']] * G # compute score row['price'], row['saved'] = scoring.price(row) # assume the cost of mining a distant asteroid per kg won't be much better # than cost to launch from earth # ie., 99.999% of revenue is spent on operations row['saved'] = row['saved'] * 0.00001 row['closeness'] = scoring.closeness_weight(row) row['profit'] = scoring.profit(row) # TODO this belongs in scoring score = min(row['price'], 1e14) / 5e12 if score < 0.0001: # It's worthless, so closeness doesn't matter row['score'] = score else: score = score * row['closeness']**3 row['score'] = score coll.update({'full_name': row['full_name']}, {'$set': row}, True) # upsert n += 1 if n % 3000 == 0: print n, '...'