def update_csv_to_municipalities(csv_to_be_parsed, csv_parsed_successfully, csv_parsed_unsuccessfully, format="kml"): """ :param csv_to_be_parsed: :param csv_parsed_successfully: :param csv_parsed_unsuccessfully: :param format: this tool uses a complete csv which has the osm_id numbers to search and reformat the csv to the complete format : this tool uses open_street_map """ #TODO add a handle for formatting a different formats #TODO add a option for using a more complex csv #TODO add a option for google search csv_to_be_parsed = open(csv_to_be_parsed, 'rb') csv_parsed_successfully = open(csv_parsed_successfully, 'w+') csv_parsed_unsuccessfully = open(csv_parsed_unsuccessfully, 'w+') reader = unicode_csv.UnicodeReader(csv_to_be_parsed) writer_successfully = unicode_csv.UnicodeWriter(csv_parsed_successfully) writer_unsuccessfully = unicode_csv.UnicodeWriter( csv_parsed_unsuccessfully) writer_successfully.writerow([ 'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat', 'lng', 'polygon' ]) writer_unsuccessfully.writerow([ 'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat', 'lng', 'polygon)' ]) for row in reader: unique_id = row[0] search_string = row[1] osm_id = row[2] time.sleep(.2) entity_data = fetch.get_entity_data_from_osm_id( unique_id, search_string, osm_id) if entity_data: print entity_data writer_successfully.writerow(entity_data) # if for some reason this id was not found on open street data else: print '************* did not find osm_id: ' + osm_id + 'unique id : ' + unique_id + \ ' ******************' writer_unsuccessfully.writerow([unique_id, osm_id])
def main(): rss = [] update_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") #cleaner = Cleaner(remove_unknown_tags=True, allow_tags=None); ### Database Information ### csv_file = "search_result.csv" db_file = "search_result.db" # Database connection db = sqlite3.connect(db_file) db_cursor = db.cursor() with open('regex_out_top.txt') as f: iTunesLinks = f.readlines()# reads through output of TopPodasts.py g = open("rsstestout.txt","w") #opens output file, because there are too many links to be readable on command line for link in iTunesLinks: #go through output of TopPodasts.py and use them in the itunes_feed_extractor.py g.write(str(itunes_feed_extractor.ConvertItunesLink(link)) + "\n") #writes rss URLs to a file rss.append(itunes_feed_extractor.ConvertItunesLink(link)) #collects rss feeds into array for link in rss: #go through rss array, link by link r = feedparser.parse(str(link)) #parse each link url = r['feed']['link'] for items in r.entries: #Go through episodes of each podcast title = items["title"] text1 = items.content[0].value.encode('ascii','ignore') #this works in interpreter text = re.sub(ur'<.*?>',"", text1) #this works in interpreter too #text.decode() #this works in interpreter too #For some reason, this code is pulling everything in the entry titled <div class='itemcontent' name='decodeable'> enter_data(db_cursor, url, title, text, update_time) #Should I just create CSV here? # Uncomment this line to clean existing entries from the database # clean_expired_entries(db_cursor, update_time) # Commit the change to sqlite database db.commit() # pdb.set_trace() # Convert the sqlite3 database to csv db_cursor.execute('SELECT * FROM listings') with open("search_result.csv", "wb") as f: csv_writer = ucsv.UnicodeWriter(f) csv_writer.writerow([i[0] for i in db_cursor.description]) # Write the header csv_writer.writerows(db_cursor) # Close the database db.close() g.close() f.close()
def main(): # When this operation starts update_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") ### Search Parameters ### locations = ["washingtondc"] sublocations = ["doc", "nva", "mld"] search_terms = [] listings = ["cta"] # cta is the listing for cars and trucks price = [3000, 7500] # min and max price year = [2004, 2010] # min and max year makes = ["honda", "toyota", "hyundai"] title_status = 1 # Clean title has_pic = 1 # has pic ### Database Information ### csv_file = "search_result.csv" db_file = "search_result.db" # Database connection db = sqlite3.connect(db_file) db_cursor = db.cursor() # Retrieve and parse rss rss_links = rss_link_generator(locations, sublocations, search_terms, listings, \ price, year, makes, title_status, has_pic) retrieve_and_enter_data(db_cursor, rss_links, update_time) # Uncomment this line to clean existing entries from the database # clean_expired_entries(db_cursor, update_time) # Commit the change to sqlite database db.commit() # pdb.set_trace() # Convert the sqlite3 database to csv db_cursor.execute('SELECT * FROM listings') with open("search_result.csv", "wb") as f: csv_writer = ucsv.UnicodeWriter(f) csv_writer.writerow([i[0] for i in db_cursor.description ]) # Write the header csv_writer.writerows(db_cursor) # Close the database db.close()
def fix_csv_area(district_csv, fixed_csv): """ fix a csv to have an id and a open strat search name """ reader_file = open(district_csv, 'rb') writer_file = open(fixed_csv, 'w+') reader = unicode_csv.UnicodeReader(reader_file) writer = unicode_csv.UnicodeWriter(writer_file) header = reader.next() print header for row in reader: open_street_search = row[3] + ' District' + ' Israel' open_street_search_english = row[4] + ' District' + ' Israel' if row[1] == '1111114': open_street_search_english = 'judea and samaria Area' print row[1], open_street_search, 'or : ', open_street_search_english writer.writerow( [row[1], open_street_search, open_street_search_english]) reader_file.close() writer_file.close()
def render_csv(request, addon, stats, fields): """Render a stats series in CSV.""" # Start with a header from the template. ts = time.strftime('%c %z') response = jingo.render(request, 'stats/csv_header.txt', { 'addon': addon, 'timestamp': ts }) # For remora compatibility, reverse the output so oldest data # is first. # XXX: The list() performance penalty here might be big enough to # consider changing the sort order at lower levels. writer = unicode_csv.UnicodeWriter(response) writer.writerow(fields) stats_list = list(stats) for row in reversed(stats_list): writer.writerow(row) fudge_headers(response, stats_list) response['Content-Type'] = 'text/plain; charset=utf-8' return response
def fix_csv(data_csv, fixed_csv): """ fix a csv to have an id and a open street search name adds israel to the search item :param data_csv: orignal data csv :param fixed_csv: the fixed data csv """ reader_file = open(data_csv, 'rb') writer_file = open(fixed_csv, 'r+') reader = unicode_csv.UnicodeReader(reader_file) writer = unicode_csv.UnicodeWriter(writer_file) header = reader.next() print header for row in reader: open_street_search = row[3] + ' Israel' open_street_search_english = row[4] + ' Israel' print row[1], open_street_search, 'or : ', open_street_search_english writer.writerow( [row[1], open_street_search, open_street_search_english]) reader_file.close() writer_file.close()
def parse_csv_to_municipalities(csv_to_be_parsed, csv_parsed_successfully, csv_parsed_unsuccessfully, format="kml", search_column=0): """ A tool to parse a csv file into a csv which contains the municipalities boundaries The tool will parse search_column and find the boundaries of a municipality. this tool uses open_street_map One column of csv to be parsed should contain the name of the muni to be search for. Another csv_to_be_parsed = file to be parsed csv_parsed_successfully = file name to write successfull search which found a muni boundry polygon csv_parsed_unsuccessfully = file name write all search csv lines which didn't find a muni boundry match. format = the format of the column which will contain the polygon's of the muni boundry search_column = the column which contains the name of the muni search for. """ #TODO add a handle for formatting a different formats #TODO add a option for using a more complex csv #TODO add a option for google search csv_to_be_parsed = open(csv_to_be_parsed, 'rb') csv_parsed_successfully = open(csv_parsed_successfully, 'w+') csv_parsed_unsuccessfully = open(csv_parsed_unsuccessfully, 'w+') reader = unicode_csv.UnicodeReader(csv_to_be_parsed) writer_successfully = unicode_csv.UnicodeWriter(csv_parsed_successfully) writer_unsuccessfully = unicode_csv.UnicodeWriter( csv_parsed_unsuccessfully) writer_successfully.writerow([ 'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat', 'lng', 'polygon' ]) writer_unsuccessfully.writerow([ 'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat', 'lng', 'polygon' ]) search_string_1 = '' search_string_2 = '' unique_id = '' for row in reader: search_string_1 = row[1] search_string_2 = row[2] unique_id = row[0] time.sleep(.5) entity_data = fetch.get_entity_data(unique_id, search_string_1) if entity_data: print entity_data writer_successfully.writerow(entity_data) elif search_string_2: entity_data = fetch.get_entity_data(unique_id, search_string_2) if entity_data: print entity_data writer_successfully.writerow(entity_data) else: print '************* did not find : ' + search_string_1 + ' or :' + search_string_2 + \ ' unique id number : ' \ + unique_id + ' ******************' writer_unsuccessfully.writerow( [unique_id, search_string_1, search_string_2])