def format_data_to_insert_into_db( usn_id, details, cve_ids, apps_data, date_posted ): """Parse the ubuntu data and place it into a array Args: usn_id (str): The Ubuntu Bulletin Id. details (str): The description of the bulletin. cve_ids (list): List of cve ids. apps_data (list): List of dictionaries, containing the app name and version. date_posted (str) The time in epoch Returns: Dictionary inside of a list """ data_to_insert = [] for data in apps_data: string_to_build_id = '' for app in data[UbuntuSecurityBulletinKey.Apps]: string_to_build_id = ( string_to_build_id + app['name'] + app['version'] ) string_to_build_id = ( string_to_build_id + data[UbuntuSecurityBulletinKey.OsString] ) bulletin_id = build_bulletin_id(string_to_build_id) try: if isinstance(details, unicode): details = details.decode('utf-8') elif isinstance(details, basestring): details = unicode(details.decode('utf-8')) except Exception as e: details = details.encode('utf-8').decode('utf-8') data_to_insert.append( { UbuntuSecurityBulletinKey.Id: bulletin_id, UbuntuSecurityBulletinKey.BulletinId: usn_id, UbuntuSecurityBulletinKey.Details: details, UbuntuSecurityBulletinKey.DatePosted: date_posted, UbuntuSecurityBulletinKey.Apps: data[UbuntuSecurityBulletinKey.Apps], UbuntuSecurityBulletinKey.OsString: data[UbuntuSecurityBulletinKey.OsString], UbuntuSecurityBulletinKey.CveIds: cve_ids } ) return(data_to_insert)
def parse_spread_sheet(bulletin_file): """Parse the entire microsoft excel bulleting data and return the data, ready to be inserted into the database. Args: bulletin_file (str): The file location on disk Returns: List of dictionairies """ bulletin_list = [] workbook = open_workbook(bulletin_file) sheet = workbook.sheet_by_name(WindowsBulletinStrings.WORKBOOK_SHEET) rows = range(sheet.nrows) rows.pop(0) for i in rows: row = sheet.row_values(i) bulletin_dict = {} supercede_list = [] if row[7] != '': row[7] = 'KB' + str(int(row[7])) if row[2] != '': row[2] = 'KB' + str(int(row[2])) rows_to_use = ( row[1] + row[2] + row[3] + row[4] + row[6] + row[7] + row[8] + row[9] ) rows_to_use = \ unicode(rows_to_use).encode(sys.stdout.encoding, 'replace') built_id = build_bulletin_id(rows_to_use) bulletin_dict[WindowsSecurityBulletinKey.Id] = built_id date = xldate_as_tuple(row[0], workbook.datemode) epoch_time = mktime(datetime(*date).timetuple()) bulletin_dict[WindowsSecurityBulletinKey.DatePosted] = ( r.epoch_time(epoch_time) ) # Need to see if I can pull the column names and use that instead # of using the row number bulletin_dict[WindowsSecurityBulletinKey.BulletinId] = row[1] bulletin_dict[WindowsSecurityBulletinKey.BulletinKb] = row[2] bulletin_dict[WindowsSecurityBulletinKey.BulletinSeverity] = row[3] bulletin_dict[WindowsSecurityBulletinKey.BulletinImpact] = row[4] bulletin_dict[WindowsSecurityBulletinKey.Details] = row[5] bulletin_dict[WindowsSecurityBulletinKey.AffectedProduct] = row[6] bulletin_dict[WindowsSecurityBulletinKey.ComponentKb] = row[7] bulletin_dict[WindowsSecurityBulletinKey.AffectedComponent] = row[8] bulletin_dict[WindowsSecurityBulletinKey.ComponentImpact] = row[9] bulletin_dict[WindowsSecurityBulletinKey.ComponentSeverity] = row[10] if len(row) == 15: supercedes = row[12] reboot = row[13] cve_ids = row[14] else: supercedes = row[11] reboot = row[12] cve_ids = row[13] info = supercedes.split(',') for j in info: bulletin_data = j.split('[') if len(bulletin_data) > 1: bulletin_id = bulletin_data[0] bulletin_kb = re.sub('^', 'KB', bulletin_data[1][:-1]) else: bulletin_id = bulletin_data[0] bulletin_kb = None supercede_list.append( { WindowsSecurityBulletinKey.SupersedesBulletinId: bulletin_id, WindowsSecurityBulletinKey.SupersedesBulletinKb: bulletin_kb } ) bulletin_dict[WindowsSecurityBulletinKey.Supersedes] = supercede_list bulletin_dict[WindowsSecurityBulletinKey.Reboot] = reboot bulletin_dict[WindowsSecurityBulletinKey.CveIds] = cve_ids.split(',') bulletin_list.append(bulletin_dict) return(bulletin_list)