def getDictionaryFromRevAuc(raw, datasource):
    try:
        d = datetime.datetime.strptime(raw.data[21].strip(' \t\n\r'),
                                       "%m/%d/%Y")
        return { \
           DATASOURCE : datasource, \
           UNITS : replaceUndumpableData(raw.data[37]) , \
           PRICE : replaceUndumpableData(raw.data[38]), \
           AGENCY : replaceUndumpableData(raw.data[3]) , \
           VENDOR : replaceUndumpableData(raw.data[29]) , \
           PSC : replaceUndumpableData(raw.data[13]) ,  \
           DESCR : replaceUndumpableData(raw.data[24]),   \
           LONGDESCR : replaceUndumpableData(raw.data[35]) , \
    # This needs to be put in a standard format and sorted properly.

           DATE : replaceUndumpableData(d.date().isoformat()), \
    # here begin some less-standard fields

           AWARDIDIDV : replaceUndumpableData(raw.data[19]) \
          }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 2
0
def getDictionaryFromRevAuc(raw, datasource):
    try:
        d = datetime.datetime.strptime(raw.data[21].strip(" \t\n\r"), "%m/%d/%Y")
        return {
            DATASOURCE: datasource,
            UNITS: replaceUndumpableData(raw.data[37]),
            PRICE: replaceUndumpableData(raw.data[38]),
            AGENCY: replaceUndumpableData(raw.data[3]),
            VENDOR: replaceUndumpableData(raw.data[29]),
            PSC: replaceUndumpableData(raw.data[13]),
            DESCR: replaceUndumpableData(raw.data[24]),
            LONGDESCR: replaceUndumpableData(raw.data[35]),
            # This needs to be put in a standard format and sorted properly.
            DATE: replaceUndumpableData(d.date().isoformat()),
            # here begin some less-standard fields
            AWARDIDIDV: replaceUndumpableData(raw.data[19]),
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 3
0
def getDictionaryFromEDWGSAAdv(raw, datasource):
    try:
        # Choosing the "Charge Processing Date" as the official date"
        #d = datetime.datetime.strptime(raw.data[6].strip(' \t\n\r'),"%m/%d/%Y")
        d = datetime.datetime.strptime(raw.data[6].strip(' \t\n\r'),
                                       "%b %d %Y")
        return { \
        DATASOURCE : datasource, \
        UNITS : tryToInferUnitsFromDescriptionOrDefaultToOne(replaceUndumpableData(raw.data[3])), \
        PRICE : replaceUndumpableData(raw.data[2]), \
        AGENCY : replaceUndumpableData(raw.data[9]), \
        VENDOR : replaceUndumpableData(raw.data[5]),    \
    # I know all of this data is office supplies---this may not be too accurate
    # but it matches


        DESCR : replaceUndumpableData(raw.data[7]),   \
        LONGDESCR : replaceUndumpableData(raw.data[7]),   \
        DATE : replaceUndumpableData(d.date().isoformat()), \
        AWARDIDIDV : "GSAAdv", \
        "GSA Schedule Number" : replaceUndumpableData(raw.data[12]),\
        "Special Item Number" : replaceUndumpableData(raw.data[13]),\
        MANUFACTURER_NAME : replaceUndumpableData(raw.data[1]), \
        MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[0]), \
        BUREAU : replaceUndumpableData(raw.data[10]),   \
        CONTRACT_NUMBER : replaceUndumpableData(raw.data[11]), \
        TO_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[15])), \
        FROM_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[14]))  \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 4
0
def getDictionaryFromGSAAdv(raw,datasource):
    try:
        d = datetime.datetime.strptime(raw.data[6].strip(' \t\n\r'),"%b %d %Y")
        return { \
            DATASOURCE : datasource, \
            UNITS : replaceUndumpableData(raw.data[3]), \
            PRICE : replaceUndumpableData(raw.data[2]), \
            AGENCY : replaceUndumpableData(raw.data[10]), \
            VENDOR : replaceUndumpableData(raw.data[5]),    \
        # We are loading the "SIN" special item number field as the PSC for now.
        # I don't think this data contains a PSC code!
#            PSC : '',  \
            DESCR : replaceUndumpableData(raw.data[7]),   \
            LONGDESCR : replaceUndumpableData(raw.data[8]),   \
            DATE : replaceUndumpableData(d.date().isoformat()), \
            AWARDIDIDV : "GSA Advantage", \
            "GSA Schedule Number" : replaceUndumpableData(raw.data[13]),\
            "Special Item Number" : replaceUndumpableData(raw.data[14]),\
            UNIT_OF_ISSUE : replaceUndumpableData(raw.data[9]),\
            MANUFACTURER_NAME : replaceUndumpableData(raw.data[1]), \
            MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[0]), \
            BUREAU : replaceUndumpableData(raw.data[11]),   \
            CONTRACT_NUMBER : replaceUndumpableData(raw.data[12]),   \
            TO_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[16])), \
            FROM_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[15]))  \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 5
0
def getDictionaryFromUSASpending(raw,datasource):
    try:
# Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[14].strip(' \t\n\r'),"%m/%d/%Y")
        return { \
        DATASOURCE : datasource, \
        DESCR : replaceUndumpableData(raw.data[31]),   \
        UNITS : tryToInferUnitsFromDescriptionOrDefaultToOne(replaceUndumpableData(raw.data[32])), \
        PRICE : replaceUndumpableData(raw.data[4]), \
        AGENCY : replaceUndumpableData(raw.data[5]), \
        VENDOR : replaceUndumpableData(raw.data[43]),    \
    # I know all of this data is office supplies---this may not be too accurate
    # but it matches
        PSC : replaceUndumpableData(raw.data[80]),  \
        "product_service_code" : replaceUndumpableData(raw.data[80]),  \
        "naics_code" : replaceUndumpableData(raw.data[109]),  \
        LONGDESCR : replaceUndumpableData(raw.data[31]),   \
        DATE : replaceUndumpableData(d.date().isoformat()), \

        TO_ZIP_CODE : replaceUndumpableData(raw.data[63]), \
        "street_address" : replaceUndumpableData(raw.data[52]), \
        "city" : replaceUndumpableData(raw.data[55]), \
        "state" : replaceUndumpableData(raw.data[56]), \
        "vendor_state_code" : replaceUndumpableData(raw.data[59]), \
        "congressionaldistrict" : replaceUndumpableData(raw.data[58]), \
        "duns_number" : replaceUndumpableData(raw.data[64]), \
        "phoneno" : replaceUndumpableData(raw.data[67]), \
        "extent_competed" : replaceUndumpableData(raw.data[103]), \
        "reason_not_competed" : replaceUndumpableData(raw.data[104]), \

        AWARDIDIDV : replaceUndumpableData("USASpending")   
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
        return {}
def getDictionaryFromEDWGSAAdv(raw, datasource):
    try:
        # Choosing the "Charge Processing Date" as the official date"
        #d = datetime.datetime.strptime(raw.data[5].strip(' \t\n\r'),"%m/%d/%Y")
        d = datetime.datetime.strptime(raw.data[5].strip(' \t\n\r'),
                                       "%b %d %Y")
        return { \
        AGENCY : replaceUndumpableData(raw.data[0]), \
        AWARDIDIDV : replaceUndumpableData(raw.data[1]), \
        BUREAU : replaceUndumpableData(raw.data[2]),   \
        CONTRACT_NUMBER : replaceUndumpableData(raw.data[3]), \
        DATASOURCE : replaceUndumpableData(raw.data[4]), \
        DATE : replaceUndumpableData(d.date().isoformat()), \
        DESCR : replaceUndumpableData(raw.data[6]),   \
        FROM_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[7])),  \
        LONGDESCR : replaceUndumpableData(raw.data[8]),
        MANUFACTURER_NAME : replaceUndumpableData(raw.data[9]), \
 MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[10]), \
 PRICE : replaceUndumpableData(raw.data[11]), \
 PSC : replaceUndumpableData(raw.data[12]), \
 TO_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[13])), \
 UNIT_OF_ISSUE : replaceUndumpableData(raw.data[14]), \
 UNITS : tryToInferUnitsFromDescriptionOrDefaultToOne(replaceUndumpableData(raw.data[15])), \
 VENDOR : replaceUndumpableData(raw.data[16]),    \
 EXTENDED_PRICE : replaceUndumpableData(raw.data[17]), \
 PRODUCT_DESCRIPTION : replaceUndumpableData(raw.data[18]), \
 QUANTITY : replaceUndumpableData(raw.data[19]),
        UNSPSC_CODE : replaceUndumpableData(raw.data[20]),
        ORDERING_PROCESS : replaceUndumpableData(raw.data[21]), \
 PSC_DESCRIPTION : replaceUndumpableData(raw.data[22]), \
 DUNS_NUMBER: replaceUndumpableData(raw.data[23]), \
 }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 7
0
def getDictionaryFromStandard(raw,datasource):
    logger.error('RAW:'+repr(raw.data))
    try:
#        d = datetime.datetime.strptime(raw.data[21].strip(' \t\n\r'),"%m/%d/%Y")
        d = datetime.datetime.today()
        logger.error('RAW0:'+replaceUndumpableData(raw.data[0]))
        logger.error('RAW1:'+replaceUndumpableData(raw.data[1]))
        logger.error('RAW2:'+replaceUndumpableData(raw.data[2]))
        logger.error('RAW3:'+replaceUndumpableData(raw.data[3]))
        logger.error('RAW4:'+replaceUndumpableData(raw.data[4]))
        logger.error('RAW5:'+replaceUndumpableData(raw.data[5]))
        logger.error('RAW6:'+replaceUndumpableData(raw.data[6]))
        logger.error('RAW7:'+replaceUndumpableData(raw.data[7]))
        logger.error('RAW8:'+replaceUndumpableData(raw.data[8]))
        return { \
            UNITS : replaceUndumpableData(raw.data[0]),\
            PRICE : replaceUndumpableData(raw.data[1]),\
            AGENCY : replaceUndumpableData(raw.data[2]),\
            VENDOR : replaceUndumpableData(raw.data[3]),\
            PSC : replaceUndumpableData(raw.data[4]),\
            DESCR : replaceUndumpableData(raw.data[5]),\
            LONGDESCR : replaceUndumpableData(raw.data[6]),\
            DATE : replaceUndumpableData(raw.data[7]),\
            AWARDIDIDV  : replaceUndumpableData(raw.data[8]),\
    # This needs to be put in a standard format and sorted properly.
            DATE : replaceUndumpableData(d.date().isoformat()) \
          }
    except:
        logger.error("don't know what went wrong here"+repr(sys.exc_info()[0]))
        return {}
Ejemplo n.º 8
0
def getDictionaryFromLabEquipment(raw, datasource):
    try:
        d = datetime.datetime.strptime(raw.data[21].strip(' \t\n\r'),
                                       "%m/%d/%Y")
        return { \
         DATASOURCE : datasource, \
         UNITS : replaceUndumpableData(raw.data[37]), \
         PRICE : replaceUndumpableData(raw.data[38]), \
         AGENCY : replaceUndumpableData(raw.data[1]), \
         VENDOR : replaceUndumpableData(raw.data[29]),    \
         PSC : replaceUndumpableData(raw.data[13]),  \
         DESCR : replaceUndumpableData(raw.data[35]),   \
         LONGDESCR : replaceUndumpableData(raw.data[36]),   \
         DATE : replaceUndumpableData(d.date().isoformat()), \
         AWARDIDIDV : replaceUndumpableData(raw.data[19]), \
        "Point of Contact" : replaceUndumpableData(raw.data[2]),   \
        "Buyer_Division" : replaceUndumpableData(raw.data[3]),   \
        "Category" :replaceUndumpableData(raw.data[24]),   \
        "Seller Type" : replaceUndumpableData(raw.data[25]),   \
        "Seller Award Type" : replaceUndumpableData(raw.data[26]),   \
        "Purchase Description" : replaceUndumpableData(raw.data[31]),   \
        "Set Aside Type" : replaceUndumpableData(raw.data[32]),   \
        "Commodity Type" : replaceUndumpableData(raw.data[12]),   \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
Ejemplo n.º 9
0
def getDictionaryFromEDWGSAAdv(raw,datasource):
    try:
# Choosing the "Charge Processing Date" as the official date"
        #d = datetime.datetime.strptime(raw.data[5].strip(' \t\n\r'),"%m/%d/%Y")
        d = datetime.datetime.strptime(raw.data[5].strip(' \t\n\r'),"%b %d %Y")
        return { \
        AGENCY : replaceUndumpableData(raw.data[0]), \
        AWARDIDIDV : replaceUndumpableData(raw.data[1]), \
        BUREAU : replaceUndumpableData(raw.data[2]),   \
        CONTRACT_NUMBER : replaceUndumpableData(raw.data[3]), \
        DATASOURCE : replaceUndumpableData(raw.data[4]), \
        DATE : replaceUndumpableData(d.date().isoformat()), \
        DESCR : replaceUndumpableData(raw.data[6]),   \
        FROM_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[7])),  \
        LONGDESCR : replaceUndumpableData(raw.data[8]),  
	MANUFACTURER_NAME : replaceUndumpableData(raw.data[9]), \
	MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[10]), \
	PRICE : replaceUndumpableData(raw.data[11]), \
	PSC : replaceUndumpableData(raw.data[12]), \
	TO_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[13])), \
	UNIT_OF_ISSUE : replaceUndumpableData(raw.data[14]), \
	UNITS : tryToInferUnitsFromDescriptionOrDefaultToOne(replaceUndumpableData(raw.data[15])), \
	VENDOR : replaceUndumpableData(raw.data[16]),    \
	EXTENDED_PRICE : replaceUndumpableData(raw.data[17]), \
	PRODUCT_DESCRIPTION : replaceUndumpableData(raw.data[18]), \
	QUANTITY : replaceUndumpableData(raw.data[19]),
	UNSPSC_CODE : replaceUndumpableData(raw.data[20]),
	ORDERING_PROCESS : replaceUndumpableData(raw.data[21]), \
	PSC_DESCRIPTION : replaceUndumpableData(raw.data[22]), \
	DUNS_NUMBER: replaceUndumpableData(raw.data[23]), \
	}
    except:
	exc_type, exc_value, exc_traceback = sys.exc_info()
	traceback.print_exception(exc_type, exc_value, exc_traceback,
			      limit=2, file=sys.stderr)
        logger.error("don't know what went wrong here")
	return {}
Ejemplo n.º 10
0
def getDictionaryFromGSAAdv(raw, datasource):
    try:
        d = datetime.datetime.strptime(raw.data[6].strip(' \t\n\r'),
                                       "%b %d %Y")
        return { \
            DATASOURCE : datasource, \
            UNITS : replaceUndumpableData(raw.data[3]), \
            PRICE : replaceUndumpableData(raw.data[2]), \
            AGENCY : replaceUndumpableData(raw.data[10]), \
            VENDOR : replaceUndumpableData(raw.data[5]),    \
        # We are loading the "SIN" special item number field as the PSC for now.
        # I don't think this data contains a PSC code!
#            PSC : '',  \
            DESCR : replaceUndumpableData(raw.data[7]),   \
            LONGDESCR : replaceUndumpableData(raw.data[8]),   \
            DATE : replaceUndumpableData(d.date().isoformat()), \
            AWARDIDIDV : "GSA Advantage", \
            "GSA Schedule Number" : replaceUndumpableData(raw.data[13]),\
            "Special Item Number" : replaceUndumpableData(raw.data[14]),\
            UNIT_OF_ISSUE : replaceUndumpableData(raw.data[9]),\
            MANUFACTURER_NAME : replaceUndumpableData(raw.data[1]), \
            MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[0]), \
            BUREAU : replaceUndumpableData(raw.data[11]),   \
            CONTRACT_NUMBER : replaceUndumpableData(raw.data[12]),   \
            TO_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[16])), \
            FROM_ZIP_CODE : replaceUndumpableData(ensureZipCodeHasFiveDigits(raw.data[15]))  \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 11
0
def getDictionaryFromLabEquipment (raw, datasource):
    try:
        d = datetime.datetime.strptime(raw.data[21].strip(' \t\n\r'),"%m/%d/%Y")
        return { \
         DATASOURCE : datasource, \
         UNITS : replaceUndumpableData(raw.data[37]), \
         PRICE : replaceUndumpableData(raw.data[38]), \
         AGENCY : replaceUndumpableData(raw.data[1]), \
         VENDOR : replaceUndumpableData(raw.data[29]),    \
         PSC : replaceUndumpableData(raw.data[13]),  \
         DESCR : replaceUndumpableData(raw.data[35]),   \
         LONGDESCR : replaceUndumpableData(raw.data[36]),   \
         DATE : replaceUndumpableData(d.date().isoformat()), \
         AWARDIDIDV : replaceUndumpableData(raw.data[19]), \
        "Point of Contact" : replaceUndumpableData(raw.data[2]),   \
        "Buyer_Division" : replaceUndumpableData(raw.data[3]),   \
        "Category" :replaceUndumpableData(raw.data[24]),   \
        "Seller Type" : replaceUndumpableData(raw.data[25]),   \
        "Seller Award Type" : replaceUndumpableData(raw.data[26]),   \
        "Purchase Description" : replaceUndumpableData(raw.data[31]),   \
        "Set Aside Type" : replaceUndumpableData(raw.data[32]),   \
        "Commodity Type" : replaceUndumpableData(raw.data[12]),   \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
Ejemplo n.º 12
0
def getDictionaryFromOS2(raw, datasource):
    try:
        # Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[57].strip(' \t\n\r'),
                                       "%m-%d-%Y")
        return { \
        DATASOURCE : datasource, \
        UNITS : replaceUndumpableData(raw.data[16]), \
        PRICE : replaceUndumpableData(raw.data[19]), \
        AGENCY : replaceUndumpableData(raw.data[48]), \
        VENDOR : replaceUndumpableData(raw.data[64]),    \
    # I know all of this data is office supplies---this may not be too accurate
    # but it matches


        PSC : replaceUndumpableData(raw.data[17]),  \
        DESCR : replaceUndumpableData(raw.data[5]),   \
        # DANGER!  HACK!
        # I think the OS2 data has a better version than this!


        LONGDESCR : replaceUndumpableData(raw.data[5]),   \
    # Choosing the "Charge Processing Date" as the official date"

        DATE : replaceUndumpableData(d.date().isoformat()), \
        AWARDIDIDV : replaceUndumpableData("GSA Schedule-75"), \
    # here begin some less-standard fields
    # This data has significantly more fields--I am simply
    # selecting the most salient.  I think the reality is this sort
    # of analysis should be done in a crowd-source, "datapalooza" type approach.





        "Order Number" : replaceUndumpableData(raw.data[4]),   \
        MANUFACTURER_NAME : replaceUndumpableData(raw.data[7]),   \
        "isAbitlityOne" : replaceUndumpableData(raw.data[8]),   \
        MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[10]), \
        "Revised SubCategory" : replaceUndumpableData(raw.data[13]), \
        "Revised Category" : replaceUndumpableData(raw.data[14]), \
        UNIT_OF_ISSUE : replaceUndumpableData(raw.data[15]), \
        "UNSPSC" : replaceUndumpableData(raw.data[18]), \
        "Revised Dbt_Crdt_ind" : replaceUndumpableData(raw.data[37]), \
        "EEP_Ind" : replaceUndumpableData(raw.data[38]), \
        "CPG_Ind" : replaceUndumpableData(raw.data[39]), \
        "Comp_Remain_Toner" : replaceUndumpableData(raw.data[40]), \
        "Post_Cons_Percent" : replaceUndumpableData(raw.data[41]), \
        "Tot_Recyc_Percent" : replaceUndumpableData(raw.data[42]), \
        "Dlvry_Method" : replaceUndumpableData(raw.data[43]), \
        "Freight Charge" : replaceUndumpableData(raw.data[44]), \
        "Shipping Weight" : replaceUndumpableData(raw.data[45]), \
        "Sub_Agency1" : replaceUndumpableData(raw.data[52]), \
        "Sub_Agency2" : replaceUndumpableData(raw.data[53]), \
        "Sub_Agency3" : replaceUndumpableData(raw.data[54]), \
        "MAJCOM" : replaceUndumpableData(raw.data[55]), \
        "DODACC" : replaceUndumpableData(raw.data[56]), \
        "Pay Date" : replaceUndumpableData(raw.data[57]), \
        "Charge Processing Date" : replaceUndumpableData(raw.data[62]), \
        "Transaction Number" : replaceUndumpableData(raw.data[63]), \
        "Revised Socio Status" : replaceUndumpableData(raw.data[66]), \
        CONTRACT_NUMBER : replaceUndumpableData(raw.data[68]), \
        "Revised Ord_date" : replaceUndumpableData(raw.data[1]), \
        "Report Month" : replaceUndumpableData(raw.data[3]),   \
        "Ord_num" : replaceUndumpableData(raw.data[4]),   \
        "FSC" : replaceUndumpableData(raw.data[17]), \
        "Dbt_Crdt_Ind" : replaceUndumpableData(raw.data[36]), \
        "Tot_Recyc_Percent" : replaceUndumpableData(raw.data[42]), \
        "Dlvry_Method" : replaceUndumpableData(raw.data[43]), \
        "Pay_Method" : replaceUndumpableData(raw.data[56]), \
        TO_ZIP_CODE : replaceUndumpableData(raw.data[60]), \
        FROM_ZIP_CODE : replaceUndumpableData(raw.data[61])  \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 13
0
def getDictionaryFromOS2(raw,datasource):
    try:
# Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[57].strip(' \t\n\r'),"%m-%d-%Y")
        return { \
        DATASOURCE : datasource, \
        UNITS : replaceUndumpableData(raw.data[16]), \
        PRICE : replaceUndumpableData(raw.data[19]), \
        AGENCY : replaceUndumpableData(raw.data[48]), \
        VENDOR : replaceUndumpableData(raw.data[64]),    \
    # I know all of this data is office supplies---this may not be too accurate
    # but it matches
        PSC : replaceUndumpableData(raw.data[17]),  \
        DESCR : replaceUndumpableData(raw.data[5]),   \
        # DANGER!  HACK!
        # I think the OS2 data has a better version than this!
        LONGDESCR : replaceUndumpableData(raw.data[5]),   \
    # Choosing the "Charge Processing Date" as the official date"
        DATE : replaceUndumpableData(d.date().isoformat()), \
        AWARDIDIDV : replaceUndumpableData("GSA Schedule-75"), \
    # here begin some less-standard fields
    # This data has significantly more fields--I am simply
    # selecting the most salient.  I think the reality is this sort
    # of analysis should be done in a crowd-source, "datapalooza" type approach.

        "Order Number" : replaceUndumpableData(raw.data[4]),   \
        MANUFACTURER_NAME : replaceUndumpableData(raw.data[7]),   \
        "isAbitlityOne" : replaceUndumpableData(raw.data[8]),   \
        MANUFACTURER_PART_NUMBER : replaceUndumpableData(raw.data[10]), \
        "Revised SubCategory" : replaceUndumpableData(raw.data[13]), \
        "Revised Category" : replaceUndumpableData(raw.data[14]), \
        UNIT_OF_ISSUE : replaceUndumpableData(raw.data[15]), \
        "UNSPSC" : replaceUndumpableData(raw.data[18]), \
        "Revised Dbt_Crdt_ind" : replaceUndumpableData(raw.data[37]), \
        "EEP_Ind" : replaceUndumpableData(raw.data[38]), \
        "CPG_Ind" : replaceUndumpableData(raw.data[39]), \
        "Comp_Remain_Toner" : replaceUndumpableData(raw.data[40]), \
        "Post_Cons_Percent" : replaceUndumpableData(raw.data[41]), \
        "Tot_Recyc_Percent" : replaceUndumpableData(raw.data[42]), \
        "Dlvry_Method" : replaceUndumpableData(raw.data[43]), \
        "Freight Charge" : replaceUndumpableData(raw.data[44]), \
        "Shipping Weight" : replaceUndumpableData(raw.data[45]), \
        "Sub_Agency1" : replaceUndumpableData(raw.data[52]), \
        "Sub_Agency2" : replaceUndumpableData(raw.data[53]), \
        "Sub_Agency3" : replaceUndumpableData(raw.data[54]), \
        "MAJCOM" : replaceUndumpableData(raw.data[55]), \
        "DODACC" : replaceUndumpableData(raw.data[56]), \
        "Pay Date" : replaceUndumpableData(raw.data[57]), \
        "Charge Processing Date" : replaceUndumpableData(raw.data[62]), \
        "Transaction Number" : replaceUndumpableData(raw.data[63]), \
        "Revised Socio Status" : replaceUndumpableData(raw.data[66]), \
        CONTRACT_NUMBER : replaceUndumpableData(raw.data[68]), \
        "Revised Ord_date" : replaceUndumpableData(raw.data[1]), \
        "Report Month" : replaceUndumpableData(raw.data[3]),   \
        "Ord_num" : replaceUndumpableData(raw.data[4]),   \
        "FSC" : replaceUndumpableData(raw.data[17]), \
        "Dbt_Crdt_Ind" : replaceUndumpableData(raw.data[36]), \
        "Tot_Recyc_Percent" : replaceUndumpableData(raw.data[42]), \
        "Dlvry_Method" : replaceUndumpableData(raw.data[43]), \
        "Pay_Method" : replaceUndumpableData(raw.data[56]), \
        TO_ZIP_CODE : replaceUndumpableData(raw.data[60]), \
        FROM_ZIP_CODE : replaceUndumpableData(raw.data[61])  \
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
        return {}
def getDictionaryFromUSASpending(raw,datasource):
    try:
# Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[14].strip(' \t\n\r'),"%m/%d/%Y")
        return { \
        DATASOURCE : datasource, \
        DESCR : replaceUndumpableData(raw.data[31]),   \
        UNITS : tryToInferUnitsFromDescriptionOrDefaultToOne(replaceUndumpableData(raw.data[32])), \
        PRICE : replaceUndumpableData(raw.data[4]), \
        AGENCY : replaceUndumpableData(raw.data[5]), \
        VENDOR : replaceUndumpableData(raw.data[43]),    \
    # I know all of this data is office supplies---this may not be too accurate
    # but it matches
        PSC : replaceUndumpableData(raw.data[80]),  \
        "product_service_code" : replaceUndumpableData(raw.data[80]),  \
        "naics_code" : replaceUndumpableData(raw.data[109]),  \
        LONGDESCR : replaceUndumpableData(raw.data[31]),   \
        DATE : replaceUndumpableData(d.date().isoformat()), \

        TO_ZIP_CODE : replaceUndumpableData(raw.data[63]), \
        "street_address" : replaceUndumpableData(raw.data[52]), \
        "city" : replaceUndumpableData(raw.data[55]), \
        "state" : replaceUndumpableData(raw.data[56]), \
        "vendor_state_code" : replaceUndumpableData(raw.data[59]), \
        "congressionaldistrict" : replaceUndumpableData(raw.data[58]), \
        "duns_number" : replaceUndumpableData(raw.data[64]), \
        "phoneno" : replaceUndumpableData(raw.data[67]), \
        "extent_competed" : replaceUndumpableData(raw.data[103]), \
        "reason_not_competed" : replaceUndumpableData(raw.data[104]), \

        AWARDIDIDV : replaceUndumpableData("USASpending")   
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 15
0
def getDictionaryFromHoustonSAP(raw,datasource):
    try:
# Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[3].strip(' \t\n\r'),"%m/%d/%Y")
        return { \
        DATASOURCE : datasource, \
        LONGDESCR : replaceUndumpableData(raw.data[9]),   \
        DATE : replaceUndumpableData(d.date().isoformat()), \
        "Invoice Id" : replaceUndumpableData(raw.data[0]), \
        "Invoice Line Item Id" : replaceUndumpableData(raw.data[1]), \
        "Invoice Doc Type Id - Descr" : replaceUndumpableData(raw.data[2]), \
        "Invoice Date" : replaceUndumpableData(raw.data[3]), \
        "Invoice Post Date" : replaceUndumpableData(raw.data[4]), \
        "Fisc Year" : replaceUndumpableData(raw.data[5]), \
        "Invoice Vendor Id" : replaceUndumpableData(raw.data[6]), \
        VENDOR : replaceUndumpableData(raw.data[7]), \
        "Material Id" : replaceUndumpableData(raw.data[8]), \
        DESCR : replaceUndumpableData(raw.data[9]), \
        "Material Group ID" : replaceUndumpableData(raw.data[10]), \
        "Material Group Desc" : replaceUndumpableData(raw.data[11]), \
        "Material Class ID" : replaceUndumpableData(raw.data[12]), \
        "Material Class Desc" : replaceUndumpableData(raw.data[13]), \
        "Invoice Line Item Amount" : replaceUndumpableData(raw.data[14]), \
        PRICE : replaceUndumpableData(convertFullFormatNumber(raw.data[14])), \
        UNITS : replaceUndumpableData(convertUnitsToClosestInteger(raw.data[15])), \
        "Uom Id" : replaceUndumpableData(raw.data[16]), \
        UNIT_OF_ISSUE : replaceUndumpableData(raw.data[17]), \
        "PO" : replaceUndumpableData(raw.data[18]), \
        "PO Line" : replaceUndumpableData(raw.data[19]), \
        "PO Doc Type" : replaceUndumpableData(raw.data[20]), \
        "Purch Org Id" : replaceUndumpableData(raw.data[21]), \
        "Purch Org Descr" : replaceUndumpableData(raw.data[22]), \
        "Ext Purch Itm Catg Id - Descr" : replaceUndumpableData(raw.data[23]), \
        "PO Short Text" : replaceUndumpableData(raw.data[24]), \
        "PO Vendor" : replaceUndumpableData(raw.data[25]), \
        AGENCY : replaceUndumpableData(raw.data[26]), \
        "PO Purch Grp" : replaceUndumpableData(raw.data[27]), \
        "OA" : replaceUndumpableData(raw.data[28]), \
        AWARDIDIDV : replaceUndumpableData(raw.data[22])   
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                              limit=2, file=sys.stderr)      
        logger.error("don't know what went wrong here")
        return {}
Ejemplo n.º 16
0
def getDictionaryFromHoustonSAP(raw, datasource):
    try:
        # Choosing the "Charge Processing Date" as the official date"
        d = datetime.datetime.strptime(raw.data[3].strip(' \t\n\r'),
                                       "%m/%d/%Y")
        return { \
        DATASOURCE : datasource, \
        LONGDESCR : replaceUndumpableData(raw.data[9]),   \
        DATE : replaceUndumpableData(d.date().isoformat()), \
        "Invoice Id" : replaceUndumpableData(raw.data[0]), \
        "Invoice Line Item Id" : replaceUndumpableData(raw.data[1]), \
        "Invoice Doc Type Id - Descr" : replaceUndumpableData(raw.data[2]), \
        "Invoice Date" : replaceUndumpableData(raw.data[3]), \
        "Invoice Post Date" : replaceUndumpableData(raw.data[4]), \
        "Fisc Year" : replaceUndumpableData(raw.data[5]), \
        "Invoice Vendor Id" : replaceUndumpableData(raw.data[6]), \
        VENDOR : replaceUndumpableData(raw.data[7]), \
        "Material Id" : replaceUndumpableData(raw.data[8]), \
        DESCR : replaceUndumpableData(raw.data[9]), \
        "Material Group ID" : replaceUndumpableData(raw.data[10]), \
        "Material Group Desc" : replaceUndumpableData(raw.data[11]), \
        "Material Class ID" : replaceUndumpableData(raw.data[12]), \
        "Material Class Desc" : replaceUndumpableData(raw.data[13]), \
        "Invoice Line Item Amount" : replaceUndumpableData(raw.data[14]), \
        PRICE : replaceUndumpableData(convertFullFormatNumber(raw.data[14])), \
        UNITS : replaceUndumpableData(convertUnitsToClosestInteger(raw.data[15])), \
        "Uom Id" : replaceUndumpableData(raw.data[16]), \
        UNIT_OF_ISSUE : replaceUndumpableData(raw.data[17]), \
        "PO" : replaceUndumpableData(raw.data[18]), \
        "PO Line" : replaceUndumpableData(raw.data[19]), \
        "PO Doc Type" : replaceUndumpableData(raw.data[20]), \
        "Purch Org Id" : replaceUndumpableData(raw.data[21]), \
        "Purch Org Descr" : replaceUndumpableData(raw.data[22]), \
        "Ext Purch Itm Catg Id - Descr" : replaceUndumpableData(raw.data[23]), \
        "PO Short Text" : replaceUndumpableData(raw.data[24]), \
        "PO Vendor" : replaceUndumpableData(raw.data[25]), \
        AGENCY : replaceUndumpableData(raw.data[26]), \
        "PO Purch Grp" : replaceUndumpableData(raw.data[27]), \
        "OA" : replaceUndumpableData(raw.data[28]), \
        AWARDIDIDV : replaceUndumpableData(raw.data[22])
        }
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type,
                                  exc_value,
                                  exc_traceback,
                                  limit=2,
                                  file=sys.stderr)
        logger.error("don't know what went wrong here")
        return {}