Ejemplo n.º 1
0
from AgenciesScraperUtils import generate_nasa_url, retrieve, parse_nasa_page, save_json
import sys, os

lib_path = os.path.abspath("../../")
sys.path.append(lib_path)
from PL_fields import FIELDS

if __name__ == "__main__":

    missions = []
    data = []

    for field in FIELDS.keys():
        for mission in FIELDS[field]["missions"]:
            if not any(mission in mission_name for mission_name in missions):
                missions.append((mission, [FIELDS[field]["kw"]]))
            else:
                index = [mission_saved[0]
                         for mission_saved in missions].index(mission)
                missions[index][1].append(FIELDS[field]["kw"])

        for mission in missions:
            for field in mission[1]:
                url = generate_nasa_url(mission[0] + " " + field)
                html = retrieve(url)
                data.append([mission[0], field, parse_nasa_page(html)])

    save_json("NasaMissions.txt", ["mission", "keyword", "urls"], data)
Ejemplo n.º 2
0
from subprocess import call
from urllib import quote_plus, urlencode
# load dict from "scidata_only_payloads.json"

from PL_fields import FIELDS

# cross mission+payload

parameters = []

for f in FIELDS.keys():
    #print f
    for m in FIELDS[f]['missions']:
        parameters.append(quote_plus(m+' '+FIELDS[f]['kw']))

#print parameters

# run quickscrape on websites

options = {'NASA':
           {
              'url': 'http://nasasearch.nasa.gov/search?',
              'parameters': 'affiliate=nasa&query=%s&commit=Search',
              'scraper': 'scrapers/nasasearch.json',
              'output': 'outputs'
            },
           'ESA':
           {
              'url': 'http://www.esa.int/esasearch?',
              'parameters': 'q=%s',
              'scraper': 'scrapers/esasearch.json',
Ejemplo n.º 3
0
from AgenciesScraperUtils import generate_nasa_url, retrieve, parse_nasa_page, save_json
import sys, os

lib_path = os.path.abspath("../../")
sys.path.append(lib_path)
from PL_fields import FIELDS

if __name__ == "__main__":

    missions = []
    data = []

    for field in FIELDS.keys():
        for mission in FIELDS[field]["missions"]:
            if not any(mission in mission_name for mission_name in missions):
                missions.append((mission, [FIELDS[field]["kw"]]))
            else:
                index = [mission_saved[0] for mission_saved in missions].index(mission)
                missions[index][1].append(FIELDS[field]["kw"])

	for mission in missions:
		for field in mission[1]:
			url = generate_nasa_url(mission[0] + " " + field)
			html = retrieve(url)
			data.append([mission[0], field, parse_nasa_page(html)])

    save_json("NasaMissions.txt", ["mission", "keyword", "urls"], data)