def split_index(service): #Split index format: region -> term type -> product family indexDict = {} #contains the keys of the files that will be created productFamilies = {} usageGroupings = [] partition_keys = phelper.get_partition_keys('') for pk in partition_keys: indexDict[pk] = [] fieldnames = [] with open(get_index_file_name(service, 'index', 'csv'), 'rb') as csvfile: pricelist = csv.DictReader(csvfile, delimiter=',', quotechar='"') indexRegion = '' x = 0 for row in pricelist: indexKey = '' if x == 0: fieldnames = row.keys() if 'Location Type' in row: if row['Location Type'] == 'AWS Region': indexRegion = row['Location'] if 'Product Family' in row: if row['Product Family'] == consts.PRODUCT_FAMILY_DATA_TRANSFER: indexRegion = row['From Location'] indexKey = phelper.create_file_key(indexRegion, row['TermType'], row['Product Family']) if indexKey in indexDict: indexDict[indexKey].append(row) #Get a list of distinct product families in the index file productFamily = row['Product Family'] if productFamily not in productFamilies: productFamilies[productFamily] = [] usageGroup = row['Group'] if usageGroup not in productFamilies[productFamily]: productFamilies[productFamily].append(usageGroup) x += 1 print("productFamilies:{}".format(productFamilies)) #print "metadata: {}".format(metadata) i = 0 for f in indexDict.keys(): if indexDict[f]: i += 1 print "Writing file for key: [{}]".format(f) with open(get_index_file_name(service, f, 'csv'), 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for r in indexDict[f]: writer.writerow(r) print "Number of files written: [{}]".format(i)
def split_index(service, region, term, **args): #Split index format: region -> term type -> product family indexDict = {}#contains the keys of the files that will be created productFamilies = {} usageGroupings=[] partition_keys = phelper.get_partition_keys(service, region, term, **args)#All regions and all term types (On-Demand + Reserved) #print("partition_keys:[{}]".format(partition_keys)) for pk in partition_keys: indexDict[pk]=[] fieldnames = [] #with open(get_index_file_name(service, 'index', 'csv'), 'rb') as csvfile: with open(get_index_file_name(service, 'index', 'csv'), 'r') as csvfile: pricelist = csv.DictReader(csvfile, delimiter=',', quotechar='"') indexRegion = '' x = 0 for row in pricelist: indexKey = '' if x==0: fieldnames=row.keys() if row.get('Location Type','') == 'AWS Region': indexRegion = row['Location'] if row.get('Product Family','')== consts.PRODUCT_FAMILY_DATA_TRANSFER: indexRegion = row['From Location'] #Determine the index partition the current row belongs to and append it to the corresponding array if row.get('TermType','') == consts.TERM_TYPE_RESERVED: #TODO:move the creation of the index dimensions to a common function if service == consts.SERVICE_EC2: indexDimensions = (indexRegion,row['TermType'],row['Product Family'],row['OfferingClass'],row['Tenancy'], row['PurchaseOption']) elif service in (consts.SERVICE_RDS, consts.SERVICE_REDSHIFT):#'Tenancy' is not part of the RDS/Redshift index, therefore default it to Shared indexDimensions = (indexRegion,row['TermType'],row['Product Family'],row['OfferingClass'],row.get('Tenancy',consts.EC2_TENANCY_SHARED),row['PurchaseOption']) else: if service == consts.SERVICE_EC2: indexDimensions = (indexRegion,row['TermType'],row['Product Family'],row['Tenancy']) else: indexDimensions = (indexRegion,row['TermType'],row['Product Family']) #print ("TermType:[{}] - service:[{}] - indexDimensions:[{}]".format(row.get('TermType',''), service, indexDimensions)) indexKey = phelper.create_file_key(indexDimensions) if indexKey in indexDict: indexDict[indexKey].append(remove_fields(service, row)) #Get a list of distinct product families in the index file productFamily = row['Product Family'] if productFamily not in productFamilies: productFamilies[productFamily] = [] usageGroup = row.get('Group','') if usageGroup not in productFamilies[productFamily]: productFamilies[productFamily].append(usageGroup) x += 1 if x % 1000 == 0: print("Processed row [{}]".format(x)) print ("productFamilies:{}".format(productFamilies)) i = 0 #Create csv files based on the partitions that were calculated when scanning the main index.csv file for f in indexDict.keys(): if indexDict[f]: i += 1 print ("Writing file for key: [{}]".format(f)) with open(get_index_file_name(service, f, 'csv'),'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect='excel', quoting=csv.QUOTE_ALL) writer.writeheader() for r in indexDict[f]: writer.writerow(r) print ("Number of records in main index file: [{}]".format(x)) print ("Number of files written: [{}]".format(i)) return
def split_index(service, region): #Split index format: region -> term type -> product family indexDict = {} #contains the keys of the files that will be created productFamilies = {} usageGroupings = [] partition_keys = phelper.get_partition_keys( region, '') #All regions and all term types (On-Demand + Reserved) for pk in partition_keys: indexDict[pk] = [] #print ("indexDict:[{}]".format(indexDict)) fieldnames = [] with open(get_index_file_name(service, 'index', 'csv'), 'rb') as csvfile: pricelist = csv.DictReader(csvfile, delimiter=',', quotechar='"') indexRegion = '' x = 0 for row in pricelist: indexKey = '' if x == 0: fieldnames = row.keys() if row.get('Location Type', '') == 'AWS Region': indexRegion = row['Location'] if row.get('Product Family', '') == consts.PRODUCT_FAMILY_DATA_TRANSFER: indexRegion = row['From Location'] #Determine the index partition the current row belongs to and append it to the corresponding array #TODO: add support for Reserved RDS if row['TermType'] == consts.TERM_TYPE_RESERVED and service == consts.SERVICE_EC2: #TODO:move the creation of the index dimensions to a common function indexDimensions = (indexRegion, row['TermType'], row['Product Family'], row['OfferingClass'], row['Tenancy'], row['PurchaseOption']) else: indexDimensions = (indexRegion, row['TermType'], row['Product Family']) #indexKey = phelper.create_file_key(indexRegion,row['TermType'],row['Product Family']) indexKey = phelper.create_file_key(indexDimensions) #print ("indexKey to write to: [{}] ".format(indexKey)) if indexKey in indexDict: #print ("adding indexKey [{}] in indexDict".format(indexKey)) indexDict[indexKey].append(row) #Get a list of distinct product families in the index file productFamily = row['Product Family'] if productFamily not in productFamilies: productFamilies[productFamily] = [] usageGroup = row['Group'] if usageGroup not in productFamilies[productFamily]: productFamilies[productFamily].append(usageGroup) x += 1 print("productFamilies:{}".format(productFamilies)) #print "metadata: {}".format(metadata) i = 0 #Create csv files based on the partitions that were calculated when scanning the main index.csv file for f in indexDict.keys(): if indexDict[f]: i += 1 print "Writing file for key: [{}]".format(f) with open(get_index_file_name(service, f, 'csv'), 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect='excel', quoting=csv.QUOTE_ALL) writer.writeheader() for r in indexDict[f]: writer.writerow(r) print "Number of files written: [{}]".format(i)