def main(argv=None): if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(argv[1:], "hoid:q", ["help","output=","input_json_data=","dept=","term=","level="]) except getopt.error, msg: raise Usage(msg) verbose= True output = os.path.dirname(os.path.realpath(__file__)) + '/../data/course_data.json' json_data = None dept = None level = None term = None # option processing for option, value in opts: if option == "-q": verbose = False if option in ("-h", "--help"): raise Usage(help_message) if option in ("-i", "--input_json_data"): json_data = value if option in ("-o", "--output"): output = value if option in ("-d", "--dept"): dept = value if option in ("-t", "--term"): term = value if option in ("-l", "--level"): level = value c = testudo.crawler(term=term, verbose=verbose) if json_data: # Load exising JSON data (faster) courses = json.load(open(json_data, 'rb')) else: # Fetch course data from server if dept: # courses = c.get_courses(dept=dept, level=level) courses = [] courses.extend(c.get_courses(dept=dept, level='UGRAD')) courses.extend(c.get_courses(dept=dept, level='GRAD')) else: courses = [] for d in dept_list: courses.extend(c.get_courses(dept=d, level='UGRAD')) courses.extend(c.get_courses(dept=d, level='GRAD')) # Error checking, with counting the number of each field for attribute, value in count_fields(courses).iteritems(): if value<2: print "Field " + attribute + " has a small number of fields, " + str(value) + ". Report this problem directly to Administrators." return 0 json.dump(courses, open(output, 'wb'), indent=2) print "1" return "[Python] Crawling Successful.\n"
def main(argv=None): if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(argv[1:], "hoid:q", ["help","output=","input_json_data=","dept="]) except getopt.error, msg: raise Usage(msg) verbose= True output = 'data/course_data.json' json_data = None dept = None # option processing for option, value in opts: if option == "-q": verbose = False if option in ("-h", "--help"): raise Usage(help_message) if option in ("-i", "--input_json_data"): json_data = value if option in ("-o", "--output"): output = value if option in ("-d", "--dept"): dept = value c = testudo.crawler(term='201101', verbose=verbose) if json_data: # Load exising JSON data (faster) courses = json.load(open(json_data, 'rb')) else: # Fetch course data from server if dept: courses = c.get_courses(dept=dept) else: courses = c.get_all_courses() json.dump(courses, open(output, 'wb'), indent=2) if csv: course_writer = csv.writer(open('data/courses.csv', 'wb'), delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) section_writer = csv.writer(open('data/sections.csv', 'wb'), delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) class_time_writer = csv.writer(open('data/class_times.csv', 'wb'), delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) for c in courses: if c['sections']: for s in c['sections']: if s['class_times']: for ct in s['class_times']: class_time_writer.writerow([c['code'], s['section']] + ct.values()) del s['class_times'] section_writer.writerow([c['code']] + s.values()) del c['sections'] course_writer.writerow(c.values())
def setUp(self): self.crawler = testudo.crawler(term='201101', verbose=True) pass