def main(): global DEBUG parser = argparse.ArgumentParser(description='Retrieve OS Versions using WMI based on hostnames') parser.add_argument('--processes','-n', type=int,default=5, help='Number of processes to use') parser.add_argument('--username','-u', type=str, help='Username') parser.add_argument('--password','-p', type=str, help='Password') parser.add_argument('computerlist',metavar='FILE',type=str,default='-',nargs='?') parser.add_argument('--debug','-d',action='store_true',default=False) parser.add_argument('--output','-o', type=str, help='output') args = parser.parse_args() DEBUG=args.debug if args.debug and args.username: print "[DD] using username: {} and pasword {}".format(args.username,args.password) computerlist=None if args.output is None: outfile=sys.stdout else: outfile=open(args.output,"wb") if args.computerlist=='-': computerlist=[x.strip() for x in sys.stdin.xreadlines()] else: with open(args.computerlist,'rb') as f: computerlist=[x.strip() for x in f.xreadlines()] p=mp.Pool(args.processes) f=functools.partial(getOSVersion,debug=args.debug,username=args.username,password=args.password) # f("localhost") res=p.map(f,computerlist) # for e in res: # computername=e[0] # for ip in e[1]: # outfile.write("{}\t{}\n".format(computername,ip)) csvout=UnicodeDictWriter(outfile,FIXATTRS,dialect='excel-tab') # csvout.writeheader() # for computer in res: print FIXATTRS for computer in res: try: csvout.writerow(computer) except Exception,e: print "[EE] Bummed out on row write: {} | {}".format(computer,e)
def source2csv(source_dir, options): ''' Loads the collections in the given source directory. Arguments: source_dir - the relative path to the directory in which the config.yaml file is located. ''' config = Config(os.path.join(source_dir, 'config.yaml')) logging.info('Collections in %s: %s' % (source_dir, config.collection_names())) for collection in config.collections(): # For each collection dir in the source dir coll_dir = collection.getdir() original_dir = os.getcwd() # We'll need this to restore us to this dir at the end of processing this collection. os.chdir(os.path.join(source_dir, coll_dir)) # Create collection.csv writer coll_file = open('collection.csv.txt', 'w') coll_cols = collection.get_columns() coll_cols.sort() coll_csv = UnicodeDictWriter(coll_file, coll_cols) # coll_csv = csv.DictWriter(coll_file, coll_cols) coll_csv.writer.writerow(coll_csv.fieldnames) coll_row = collection.get_row() coll_row['layer_source'] = source_dir coll_row['layer_collection'] = coll_dir # Create polygons.csv writer poly_file = open('collection.polygons.csv.txt', 'w') poly_dw = UnicodeDictWriter(poly_file, ['shapefilename', 'json']) # poly_dw = csv.DictWriter(poly_file, ['shapefilename', 'json']) poly_dw.writer.writerow(poly_dw.fieldnames) # Convert DBF to CSV and add to collection.csv shpfiles = glob.glob('*.shp') logging.info('Processing %d layers in the %s/%s' % (len(shpfiles), source_dir, coll_dir)) for sf in shpfiles: logging.info('Extracting DBF fields from %s' % sf) csvfile = '%s.csv' % sf if os.path.exists(csvfile): # ogr2ogr barfs if there are *any* csv files in the dir os.remove(csvfile) # For Macs which have GDAL.framework, we can autodetect it # and use it automatically. ogr2ogr_path = '/Library/Frameworks/GDAL.framework/Programs/ogr2ogr' if not os.path.exists(ogr2ogr_path): # We don't have a path to use; let subprocess.call # find it. ogr2ogr_path = 'ogr2ogr' # TODO: optional command line option for ogr2ogr command command = ogr2ogr_path + ' -f CSV "%s" "%s"' % (csvfile, sf) args = shlex.split(command) try: subprocess.call(args) except OSError as errmsg: logging.error("""Error occurred while executing command line '%s': %s Please ensure that %s is executable and available on your path. """, command, args[0], errmsg) raise # Re-raise the OSError exception. # Copy and update coll_row with DBF fields row = copy.copy(coll_row) row['layer_filename'] = os.path.splitext(sf)[0] dr = csv.DictReader(open(csvfile, 'r'), skipinitialspace=True) # Lowercase all field names. dr.fieldnames = map(lambda fn: fn.lower(), dr.fieldnames) layer_polygons = [] for dbf in dr: # For each row in the DBF CSV file (1 row per polygon) polygon = {} for source, mols in collection.get_mapping().iteritems(): # Required DBF fields # Source may be blank for required fields, which is wrong. if source is None or source == '': logging.error('Required field(s) %s are not mapped to any value. Please check %s/config.yaml!' % (", ".join(mols), source_dir)) sys.exit(1) for mol in mols: if unicode(source)[0] == '=': # Map a DBF column to a field. # For case-insensitivity, we lowercase all field names. source_name = source[1:].lower() sourceval = dbf.get(source_name) if not source_name in dbf: logging.error('Unable to map required DBF field %s to %s. Valid fieldnames include: %s.' % (source_name, mol, ", ".join(dr.fieldnames))) sys.exit(1) row[mol] = sourceval polygon[mol] = sourceval else: # Sets the value of the field based on 'source' row[mol] = source polygon[mol] = source for source, mols in collection.get_mapping(required=False).iteritems(): #Optional DBF fields for mol in mols: # Source can be blank for optional fields, which is fine. if source is None or source == '': row[mol] = '' polygon[mol] = '' elif unicode(source)[0] == '=': # Map a DBF column to a field. # For case-insensitivity, we lowercase all field names. source_name = source[1:].lower() # Map a DBF column to a field. sourceval = dbf.get(source_name) if not source_name in dbf: logging.error('Unable to map optional DBF field %s to %s. Valid fieldnames include: %s.' % (source_name, mol, ", ".join(dr.fieldnames))) sys.exit(1) row[mol] = sourceval polygon[mol] = sourceval else: # Sets the value of the field based on 'source' row[mol] = source polygon[mol] = source # MOL-calculated fields (see issue #120) will eventually be calculated here. # For now, that's just 'provider', 'contributor' and 'filename'. row['filename'] = row['layer_filename'] # Write coll_row to collection.csv coll_csv.writerow(row) layer_polygons.append(polygon) # Create JSON representation of dbfjson polygons_json = simplejson.dumps(layer_polygons) # TODO: Showing up as string instead of JSON in API d=dict(shapefilename=row['layer_filename'], json=polygons_json) poly_dw.writerow(dict(shapefilename=row['layer_filename'], json=polygons_json)) poly_file.flush() poly_file.close() # Important: Close the DictWriter file before trying to bulkload it logging.info('All collection metadata saved to %s' % coll_file.name) logging.info('All collection polygons saved to %s' % poly_file.name) coll_file.flush() coll_file.close() # Bulkload... # os.chdir(current_dir) if not options.dry_run: os.chdir('../../') filename = os.path.abspath('%s/%s/collection.csv.txt' % (source_dir, coll_dir)) if options.config_file is None: logging.error("No bulkloader configuration file specified: please specify one with the --config_file option.") exit(2) # Since apparently '2' signals that something is wrong in the command line arguments. config_file = os.path.abspath(options.config_file) if options.localhost: options.url = 'http://localhost:8080/_ah/remote_api' # *nixes can run appcfg.py as a program without any problem. Windows, however, # can only run appcfg.py if run through the shell. Therefore, we set the flag_run_in_shell # depending on which operating system we're in. flag_run_in_shell = (os.name == 'nt') # True if we're running in Windows; false otherwise. # Bulkload Layer entities to App Engine for entire collection cmd = [ 'appcfg.py', 'upload_data', '--config_file=%s' % config_file, '--filename=%s' % filename, '--kind=Layer', '--url=%s' % options.url, '--log_file=logs/bulkloader-log-%s' % time.strftime('%Y%m%d.%H%M%S'), '--db_filename=progress/bulkloader-progress-%s.sql3' % time.strftime('%Y%m%d.%H%M%S') ] subprocess.call(cmd, shell=flag_run_in_shell) # Bulkload LayerIndex entities to App Engine for entire collection cmd = [ 'appcfg.py', 'upload_data', '--config_file=%s' % config_file, '--filename=%s' % filename, '--kind=LayerIndex', '--url=%s' % options.url, '--log_file=logs/bulkloader-log-%s' % time.strftime('%Y%m%d.%H%M%S'), '--db_filename=progress/bulkloader-progress-%s.sql3' % time.strftime('%Y%m%d.%H%M%S') ] subprocess.call(cmd, shell=flag_run_in_shell) # Go back to the original directory for the next collection. os.chdir(original_dir)
import csv, codecs, cStringIO from lxml import etree from lxml.cssselect import CSSSelector from unicodewriter import UnicodeDictWriter if __name__ == '__main__': # Start our list votings = [] # Get the XML data root = etree.fromstring(open('camara.xml', 'r').read()) # Optionally, get the XML data from an url # root = etree.parse('http://domain.com/source.xml') for voting in CSSSelector('Votacao')(root): thisvoting = { 'description': voting.get('ObjVotacao'), 'date': voting.get('Data'), } for deputado in CSSSelector('Deputado')(voting): row = {'name': deputado.get('Nome'), 'party': deputado.get('Partido'), 'state': deputado.get('UF'), 'stance': deputado.get('Voto')} row.update(thisvoting) votings.append(row) with open('pitacosdeputados.csv', 'wb') as csvfile: writer = UnicodeDictWriter(csvfile, ['description', 'date', 'name', 'party', 'state', 'stance']) for row in votings: writer.writerow(row)