def test_InsertGrantForwardLeadIntoDatabase(self): fakegrantforwardleadarray = [ 'keyword', 'url', 'name', 'description', 'sponsor', 'amount', 'eligibility', 'submission info', 'categories', 'opportunity source link', 'opportunity source text' ] InsertGrantForwardLeadsArrayIntoGrantForwardItems( fakegrantforwardleadarray) db = SUDBConnect() rows = db.getRowsDB( "select * from dbo.GrantForwardItems where Url='url'") self.assertIsNotNone(rows) self.assertEqual('keyword', rows[0].Keyword) self.assertEqual('url', rows[0].Url) self.assertEqual('name', rows[0].Name) self.assertEqual('description', rows[0].Description) self.assertEqual('sponsor', rows[0].Sponsor) self.assertEqual('eligibility', rows[0].Eligibility) self.assertEqual('submission info', rows[0].SubmissionInfo) self.assertEqual('categories', rows[0].Categories) self.assertEqual('opportunity source link', rows[0].OpportunitySourceLink) self.assertEqual('opportunity source text', rows[0].OpportunitySourceText) db.insertUpdateOrDeleteDB( "delete from dbo.GrantForwardItems where Url='url'")
def __init__(self, website, totalNewUpdated, numNewEntries, numUpdates, useDifferentWebsiteName=False): self.website = website self.totalEntriesStr = str(totalNewUpdated) self.numNew = str(numNewEntries) self.numUpdates = str(numUpdates) self.useDifferentWebsiteName = useDifferentWebsiteName self.date = time.strftime('%Y%m%d') self.db = SUDBConnect() totalLeadsInTable = self.getTotalLeadsInTable() if totalNewUpdated > 0: if not self.useDifferentWebsiteName: self.db.insertUpdateOrDeleteDB( "insert into dbo.CerebroLogs (Website, Date, New, Updated, TotalNewUpdated, TotalLeads) values ('" + self.website + "', '" + self.date + "', '" + self.numNew + "', '" + self.numUpdates + "', '" + self.totalEntriesStr + "', '" + totalLeadsInTable + "')") else: self.db.insertUpdateOrDeleteDB( "insert into dbo.CerebroLogs (Website, Date, New, Updated, TotalNewUpdated, TotalLeads) values ('" + self.useDifferentWebsiteName + "', '" + self.date + "', '" + self.numNew + "', '" + self.numUpdates + "', '" + self.totalEntriesStr + "', '" + totalLeadsInTable + "')")
def __init__(self, scholarsiteLeadArray, fundingClassification, badScholarshipClassification): self.scholarsiteLeadArray = scholarsiteLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.scholarsiteLeadArray[0] self.amount = self.scholarsiteLeadArray[1] self.deadline = self.scholarsiteLeadArray[2] self.requirements = self.scholarsiteLeadArray[3] self.annualAwards = self.scholarsiteLeadArray[4] self.major = self.scholarsiteLeadArray[5] self.academicLevel = self.scholarsiteLeadArray[6] self.qualifiedMinorities = self.scholarsiteLeadArray[7] self.eligibleInstitution = self.scholarsiteLeadArray[8] self.eligibleRegion = self.scholarsiteLeadArray[9] self.usCitizen = self.scholarsiteLeadArray[10] self.usResident = self.scholarsiteLeadArray[11] self.foreignNational = self.scholarsiteLeadArray[12] self.minimumAge = self.scholarsiteLeadArray[13] self.maximumAge = self.scholarsiteLeadArray[14] self.classRank = self.scholarsiteLeadArray[15] self.minimumGPA = self.scholarsiteLeadArray[16] self.minimumACT = self.scholarsiteLeadArray[17] self.minimumSAT = self.scholarsiteLeadArray[18] self.date = time.strftime('%Y%m%d')
class WriteTablesToDisk(object): def __init__(self, tableName, user='******'): self.tableName = tableName self.user = user self.website = re.sub('Leads', '', self.tableName) self.sqlDB = SUDBConnect(destination='database') self.fileDB = SUDBConnect(destination='filesystem') self.columns = self.sqlDB.getColumnNamesFromTable(self.tableName) self.rows = self.sqlDB.getRowsDB("select * from dbo.%s" % self.tableName) self.urls = self.getUrls() for row, url in zip(self.rows, self.urls): values = row self.fileDB.writeFile(self.columns, values, self.user, self.website, url) def getUrls(self): urls = [] for row in self.rows: try: urls.append(row.Url) except: urls.append(row.SourceWebsite) return urls
class CerebroLogs(object): def __init__(self, website, totalNewUpdated, numNewEntries, numUpdates, useDifferentWebsiteName=False): self.website = website self.totalEntriesStr = str(totalNewUpdated) self.numNew = str(numNewEntries) self.numUpdates = str(numUpdates) self.useDifferentWebsiteName = useDifferentWebsiteName self.date = time.strftime('%Y%m%d') self.db = SUDBConnect() totalLeadsInTable = self.getTotalLeadsInTable() if totalNewUpdated > 0: if not self.useDifferentWebsiteName: self.db.insertUpdateOrDeleteDB( "insert into dbo.CerebroLogs (Website, Date, New, Updated, TotalNewUpdated, TotalLeads) values ('" + self.website + "', '" + self.date + "', '" + self.numNew + "', '" + self.numUpdates + "', '" + self.totalEntriesStr + "', '" + totalLeadsInTable + "')") else: self.db.insertUpdateOrDeleteDB( "insert into dbo.CerebroLogs (Website, Date, New, Updated, TotalNewUpdated, TotalLeads) values ('" + self.useDifferentWebsiteName + "', '" + self.date + "', '" + self.numNew + "', '" + self.numUpdates + "', '" + self.totalEntriesStr + "', '" + totalLeadsInTable + "')") def getTotalLeadsInTable(self): tableName = '%sLeads' % self.website rows = self.db.getRowsDB("select * from dbo.%s" % tableName) return str(len(rows))
def test_normalRun(self): # set up db = SUDBConnect() sponsorsList = [] descriptionList = [] ocList = [] iefaLeadTrainingIdList = [] actualBad = [] concatenatedDescriptionOCList = [] rows = db.getRowsDB("select * from dbo.IefaLeadsTrainingItems where BadScholarship!='Maybe'") for row in rows: sponsorsList.append(row.Sponsor) descriptionList.append(row.Description) ocList.append(row.OtherCriteria) actualBad.append(row.BadScholarship) iefaLeadTrainingIdList.append(str(row.IefaLeadTrainingId)) for i in range(len(descriptionList)): conatenatedItem = '%s %s' % (descriptionList[i], ocList[i]) concatenatedDescriptionOCList.append(conatenatedItem) # test testNER = ClassifyBadScholarships(sponsorsList, concatenatedDescriptionOCList) predictedBad = testNER.loopThroughLeadsAndDoStuff() accuracy = ComputeAccuracy(actualBad, predictedBad).calculateAccuracy() print(accuracy) # put result in db for i in range(len(predictedBad)): iefaLeadTrainingId = iefaLeadTrainingIdList[i] predicted = predictedBad[i] db.insertUpdateOrDeleteDB( "update dbo.IefaLeadsTrainingItems set NormalTestPredictedTag='" + predicted + "' where IefaLeadTrainingId='" + iefaLeadTrainingId + "'")
def test_InsertPivotLeadsArrayIntoPivotLeadsDB(self): fakepivotleadsarray = ['keyword', 'url', 'name', 'abstract', 'sponsor', 'amount', 'applicant type', 'citizenship residency', 'activity location', 'eligibility', 'categories', 'source website', 'source text'] InsertPivotLeadsArrayIntoPivotLeadsDB(fakepivotleadsarray) db = SUDBConnect() rows = db.getRowsDB("select * from dbo.PivotLeads where Url='url'") self.assertIsNotNone(rows) self.assertEqual('keyword', rows[0].Keyword) self.assertEqual('url', rows[0].Url) self.assertEqual('name', rows[0].Name) self.assertEqual('abstract', rows[0].Abstract) self.assertEqual('sponsor', rows[0].Sponsor) self.assertEqual('amount', rows[0].Amount) self.assertEqual('applicant type', rows[0].ApplicantType) self.assertEqual('citizenship residency', rows[0].CitizenshipResidency) self.assertEqual('activity location', rows[0].ActivityLocation) self.assertEqual('eligibility', rows[0].Eligibility) self.assertEqual('categories', rows[0].Categories) self.assertEqual('source website', rows[0].SourceWebsite) self.assertEqual('source text', rows[0].SourceText) db.insertUpdateOrDeleteDB("delete from dbo.PivotLeads where Url='url'")
def __init__(self, unigoLeadArray, fundingClassification, badScholarshipClassification): self.unigoLeadArray = unigoLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassification = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.unigoLeadArray[0] self.amount = self.unigoLeadArray[1] self.deadline = self.unigoLeadArray[2] self.url = self.unigoLeadArray[3] self.url = re.sub("'", '', self.url) self.sponsor = self.unigoLeadArray[4] self.awardAmount = self.unigoLeadArray[5] self.recipients = self.unigoLeadArray[6] self.requirements = self.unigoLeadArray[7] self.additionalInfo = self.unigoLeadArray[8] self.contact = self.unigoLeadArray[9] self.address = self.unigoLeadArray[10] self.deadlineInformation = self.unigoLeadArray[11] self.sourceWebsite = self.unigoLeadArray[12] self.sourceWebsite = re.sub("'", '', self.sourceWebsite) self.sourceText = self.unigoLeadArray[13] self.date = time.strftime('%Y%m%d')
def __init__(self, goodCallLeadArray, fundingClassification, badScholarshipClassification): self.goodCallLeadArray = goodCallLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = goodCallLeadArray[0] self.url = goodCallLeadArray[1] self.numAwards = goodCallLeadArray[2] self.amount = goodCallLeadArray[3] self.description = goodCallLeadArray[4] self.sponsor = goodCallLeadArray[5] self.classStatus = goodCallLeadArray[6] self.major = goodCallLeadArray[7] self.gender = goodCallLeadArray[8] self.ethnicity = goodCallLeadArray[9] self.grades = goodCallLeadArray[10] self.testScores = goodCallLeadArray[11] self.geography = goodCallLeadArray[12] self.deadline = goodCallLeadArray[13] self.essayInfo = goodCallLeadArray[14] self.sourceWebsite = goodCallLeadArray[15] self.sourceText = goodCallLeadArray[16] self.date = time.strftime('%Y%m%d')
def __init__(self, whichHalfToDo): self.whichHalfToDo = whichHalfToDo self.db = SUDBConnect() self.rows = self.db.getRowsDB("select * from dbo.ClassifiedPotentialScholarships") self.oneVsRestPretrainedModelFile = 'OneVsRestLRTrainedClassifiers/OneVsRestLRTrainedModel' self.oneVsRestPretrainedFeaturesValueCountsFile = 'OneVsRestLRTrainedClassifiers/OneVsRestLRTrainedFeaturesValueCounts' self.dataTextList = self.getDataTextList() self.potentialScholarshipIdsList = self.getScholarshipIdsList() halfOfRows = math.ceil(len(self.dataTextList) * 0.5) if self.whichHalfToDo == 1: self.dataTextList = self.dataTextList[:halfOfRows] self.potentialScholarshipIdsList = self.potentialScholarshipIdsList[:halfOfRows] elif self.whichHalfToDo == 2: self.dataTextList = self.dataTextList[-halfOfRows:] self.potentialScholarshipIdsList = self.potentialScholarshipIdsList[-halfOfRows:] self.OneVsRestClassifier = OneVsRestClassifyFromPretrainedModel(self.oneVsRestPretrainedModelFile, self.oneVsRestPretrainedFeaturesValueCountsFile, self.dataTextList, self.potentialScholarshipIdsList) self.predictedClassStatuses = self.OneVsRestClassifier.getPredictions() self.insertResultsIntoDB()
def test_InsertPivotLeadsArrayIntoPivotLeadsDB(self): fakepivotleadsarray = [ 'keyword', 'url', 'name', 'abstract', 'sponsor', 'amount', 'applicant type', 'citizenship residency', 'activity location', 'eligibility', 'categories', 'source website', 'source text' ] InsertPivotLeadsArrayIntoPivotLeadsDB(fakepivotleadsarray) db = SUDBConnect() rows = db.getRowsDB("select * from dbo.PivotLeads where Url='url'") self.assertIsNotNone(rows) self.assertEqual('keyword', rows[0].Keyword) self.assertEqual('url', rows[0].Url) self.assertEqual('name', rows[0].Name) self.assertEqual('abstract', rows[0].Abstract) self.assertEqual('sponsor', rows[0].Sponsor) self.assertEqual('amount', rows[0].Amount) self.assertEqual('applicant type', rows[0].ApplicantType) self.assertEqual('citizenship residency', rows[0].CitizenshipResidency) self.assertEqual('activity location', rows[0].ActivityLocation) self.assertEqual('eligibility', rows[0].Eligibility) self.assertEqual('categories', rows[0].Categories) self.assertEqual('source website', rows[0].SourceWebsite) self.assertEqual('source text', rows[0].SourceText) db.insertUpdateOrDeleteDB("delete from dbo.PivotLeads where Url='url'")
def test_infoTextOnlyNoInsert(self): # set up db = SUDBConnect() sponsorsList = [] descriptionList = [] ocList = [] iefaLeadTrainingIdList = [] actualBad = [] concatenatedDescriptionOCList = [] rows = db.getRowsDB("select * from dbo.IefaLeadsTrainingItems where BadScholarship!='Maybe'") for row in rows: sponsorsList.append(row.Sponsor) descriptionList.append(row.Description) ocList.append(row.OtherCriteria) actualBad.append(row.BadScholarship) iefaLeadTrainingIdList.append(str(row.IefaLeadTrainingId)) for i in range(len(descriptionList)): conatenatedItem = '%s %s' % (descriptionList[i], ocList[i]) concatenatedDescriptionOCList.append(conatenatedItem) # test testNER = ClassifyBadScholarships(sponsorsList, concatenatedDescriptionOCList, test='infoTextOnly') infoTextPredictedBad = testNER.loopThroughLeadsAndDoStuff() accuracy = ComputeAccuracy(actualBad, infoTextPredictedBad).calculateAccuracy() print(accuracy)
def useOnlyFirstRegexHelperTrue(attributeId, stringToScan): DB = SUDBConnect() rows = DB.getRowsDB(' Select ' + str(attributeId) + ' , RegEx, RegExHelper from RegExHelpers') regExHelper = '' if len(rows) >= 1: regExHelper = rows[0].RegExHelper return Parser(stringToScan, regExHelper).doesMatchExist()
def __init__(self, iefaLeadArray, fundingClassification, badScholarshipClassification): self.iefaLeadArray = iefaLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.iefaLeadArray[0] self.url = self.iefaLeadArray[1] self.url = CleanText.replaceSingleQuotesWithTwoSingleQuotes(self.url) self.sponsor = self.iefaLeadArray[2] self.submissionDeadline = self.iefaLeadArray[3] self.majors = self.iefaLeadArray[4] self.amount = self.iefaLeadArray[5] self.description = self.iefaLeadArray[6] self.otherCriteria = self.iefaLeadArray[7] self.numberAwards = self.iefaLeadArray[8] self.hostInstitution = self.iefaLeadArray[9] self.includes = self.iefaLeadArray[10] self.nationalityRequired = self.iefaLeadArray[11] self.hostCountries = self.iefaLeadArray[12] self.sourceWebsite = self.iefaLeadArray[13] self.sourceText = self.iefaLeadArray[14] self.date = time.strftime('%Y%m%d')
def test_PutThingInTablesUpdate(self): # setup db = SUDBConnect() createTestRow = PutThingsInTables( 'Tests', ['Regex', 'AttributeId'], ['rowToUpdate', '94']).createSQLQueryInsert() db.insertUpdateOrDeleteDB(createTestRow) self.assertIsNotNone("%s" % createTestRow) rows = db.getRowsDB( "select * from dbo.Tests where Regex='rowToUpdate'") testRegexValue = rows[0].Regex self.assertEqual('rowToUpdate', testRegexValue) # test updateQuery = PutThingsInTables('Tests', ['Regex'], ['thisCellWasUpdated'], whereColumnNames=['AttributeId'], whereValues=['94' ]).createSQLQueryUpdate() db.insertUpdateOrDeleteDB(updateQuery) rows = db.getRowsDB("select * from dbo.Tests where AttributeId='94'") testUpdatedValue = rows[0].Regex self.assertEqual('thisCellWasUpdated', testUpdatedValue) # tear down db.insertUpdateOrDeleteDB( "delete from dbo.Tests where AttributeId='94'")
class ScholarsiteLeadsGetDatabaseInfo(object): def __init__(self): self.db = SUDBConnect() def getScholarsiteLeadsIds(self): scholarsiteLeadsIds = [] rows = self.db.getRowsDB("select * from dbo.ScholarsiteLeads") for row in rows: scholarsiteLeadsIds.append(row.ScholarsiteLeadId) return scholarsiteLeadsIds def getNames(self): names = [] rows = self.db.getRowsDB("select * from dbo.ScholarsiteLeads") for row in rows: names.append(row.Name) return names def getRequirements(self): requirements = [] rows = self.db.getRowsDB("select * from dbo.ScholarsiteLeads") for row in rows: requirements.append(row.Requirements) return requirements
def getTags(): db = SUDBConnect() tags = [] rows = db.getRowsDB("select Tag from dbo.PivotTags") for row in rows: tags.append(row.Tag) return tags
def getTitles(): db = SUDBConnect() titles = [] rows = db.getRowsDB("select Name from dbo.PivotTags") for row in rows: titles.append(row.Name) return titles
def __init__(self, googleLeadArray): self.googleLeadArray = googleLeadArray db = SUDBConnect() title = self.googleLeadArray[0] link = self.googleLeadArray[1] description = self.googleLeadArray[2] db.insertUpdateOrDeleteDB( "INSERT INTO dbo.GoogleLeads (GoogleLeadId, KeyTerm, Title, Link, Description, LinkBody, DateLeadGenerated, DateBodyGenerated) VALUES (0 , '', '" + title + "', '" + link + "', '" + description + "', '', GETDATE(), GETDATE())")
def UseOnlyFirstRegex(attributeId, stringToScan): DB = SUDBConnect() rows = DB.getRowsDB(' Select ' + str(attributeId) + ' , RegEx from RegExHelpers') searchCriteria = '' if len(rows) >= 1: searchCriteria = rows[0].RegEx return Parser(stringToScan, searchCriteria).doesMatchExist()
def test_CreateFilePath(self): db = SUDBConnect() user = '******' website = 'KyasCatPage' fileName = 'cats.txt' filePath = db.createFilePath(user, website, fileName) expectedFilePath = 'c:\Cerebro\%s\%s\%s' % (user, website, fileName) self.assertEqual(expectedFilePath, filePath)
def getEligibilities(): db = SUDBConnect() eligibilities = [] rows = db.getRowsDB("select Eligibility from dbo.PivotTags") for row in rows: eligibilities.append(row.Eligibility) return eligibilities
def doInsert(GoogleLeadArray): db = SUDBConnect() title = GoogleLeadArray[0] url = GoogleLeadArray[1] description = GoogleLeadArray[2] db.insertUpdateOrDeleteDB( "INSERT INTO dbo.LinkCrawlerHrefs ( QuestionId, LinkUrl, LinkName, LinkDescription, LinkBody, ProcessUsed, IsBadLink, InsertDate, UpdateDate) VALUES ( 0, '" + url + "', '" + title + "', '" + description + "', '', 0, 0, GETDATE(), GETDATE())") return None
def test_ProcessPivotLeads(self): ProcessPivotLeads( PivotLeads('engineering', isTest=True).processSearchResultsAndMakeLeadArray()) db = SUDBConnect() rows = db.getRowsDB( "select * from dbo.PivotLeads where Keyword='engineering'") self.assertGreaterEqual(len(rows), 10)
def getAbstracts(): db = SUDBConnect() abstracts = [] rows = db.getRowsDB("select Abstract from dbo.PivotTags") for row in rows: abstracts.append(row.Abstract) return abstracts
class CollegeGreenLightLeadsGetDatabaseInfo(object): def __init__(self): self.db = SUDBConnect() def getDescriptions(self): descriptions = [] rows = self.db.getRowsDB( "select Description from dbo.CollegeGreenLightLeads") for row in rows: descriptions.append(row.Description) return descriptions def getSourceTexts(self): sourceTexts = [] rows = self.db.getRowsDB( "select SourceText from dbo.CollegeGreenLightLeads") for row in rows: sourceTexts.append(row.SourceText) return sourceTexts def getCollegeGreenLightLeadsIds(self): collegeGreenLightLeadIds = [] rows = self.db.getRowsDB( "select CollegeGreenLightLeadId from dbo.CollegeGreenLightLeads") for row in rows: collegeGreenLightLeadIds.append(str(row.CollegeGreenLightLeadId)) return collegeGreenLightLeadIds def getSponsors(self): sponsors = [] rows = self.db.getRowsDB("select * from dbo.CollegeGreenLightLeads") for row in rows: sponsors.append(row.Sponsor) return sponsors def getConcatenatedDescriptionSourceText(self): listConcatenatedItems = [] descriptions = self.getDescriptions() sourceTexts = self.getSourceTexts() for i in range(len(descriptions)): description = descriptions[i] sourceText = sourceTexts[i] concatenatedItem = '%s %s' % (description, sourceText) listConcatenatedItems.append(concatenatedItem) return listConcatenatedItems
def test_InsertScholarshipArrayIntoDatabase(self): db = SUDBConnect() db.insertUpdateOrDeleteDB( "delete from dbo.LinkCrawlerHrefs where linkurl='http://colleges.fastweb.com/d-foreign-languages-literatures-and-linguistics'") InsertScholarshipArrayIntoDatabase.doInsert(PullPageLinkTitleDescriptionToArray( 'http://colleges.fastweb.com/d-foreign-languages-literatures-and-linguistics').doArray()) rows = db.getRowsDB( "select * from LinkCrawlerHrefs where linkurl='http://colleges.fastweb.com/d-foreign-languages-literatures-and-linguistics'") self.assertEqual(rows[0].LinkUrl, 'http://colleges.fastweb.com/d-foreign-languages-literatures-and-linguistics')
def test_writeFile(self): db = SUDBConnect(destination='filesystem') columns = ['animal', 'name'] values = ['kitty', 'guen'] user = '******' website = 'KyasTestWebsite' url = 'www.kyastestwebsite.com' date = '20151204' db.writeFile(columns, values, user, website, url, date)
def getEmptyTagAbstracts(): db = SUDBConnect() emptyTagRows = db.getRowsDB("select * from dbo.PivotTags where ISNULL(Tag, '') = ''") emptyTagAbstracts = [] for row in emptyTagRows: emptyTagAbstracts.append(row.Abstract) return emptyTagAbstracts
def test_GrantForwardRunMajorsListActuallyDoIt(self): # set up db = SUDBConnect() # run GrantForwardRunMajorsList() # test rows = db.getRowsDB("select * from dbo.GrantForwardItems") self.assertGreaterEqual(len(rows), 100)
def getEmptyTagAbstracts(): db = SUDBConnect() emptyTagRows = db.getRowsDB( "select * from dbo.PivotTags where ISNULL(Tag, '') = ''") emptyTagAbstracts = [] for row in emptyTagRows: emptyTagAbstracts.append(row.Abstract) return emptyTagAbstracts
def test_CGRLLFileCreationWhyDoesItSuck(self): db = SUDBConnect(destination='filesystem') columns = ['sponsor'] values = ['BMI Foundation, Inc'] user = '******' website = 'CollegeGreenLight' url = 'https://www.collegegreenlight.com/scholarship/listings/BMI-Student-Composer-Awards/-s-d-49468/?sortBy=&reverse=false' # url = 'https://www.collegegreenlight.com/scholarship/listings/BMI-Student-Composer-Awards/-s-d-49468/?' date = '20150000' db.writeFile(columns, values, user, website, url, date)
def test_PivotLeadsRunMajorsListActuallyDoIt(self): # set up db = SUDBConnect() # run PivotLeadsRunMajorsList() # test rows = db.getRowsDB("select * from dbo.PivotLeads") self.assertGreaterEqual(len(rows), 100)
def useAllRegexHelper(attributeId, stringToScan): DB = SUDBConnect() rows = DB.getRowsDB(' Select ' + str(attributeId) + ' , RegExHelper from RegExHelpers') regExHelperArray = [] if len(rows) >= 1: for row in rows: regExHelperArray.append(row.RegExHelper) searchCriteria = '|'.join(list(set(regExHelperArray))) return Parser(stringToScan, searchCriteria).doesMatchExist()
class InsertUnigoLeadArrayIntoUnigoLeadsDB(object): def __init__(self, unigoLeadArray, fundingClassification, badScholarshipClassification): self.unigoLeadArray = unigoLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassification = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.unigoLeadArray[0] self.amount = self.unigoLeadArray[1] self.deadline = self.unigoLeadArray[2] self.url = self.unigoLeadArray[3] self.url = re.sub("'", '', self.url) self.sponsor = self.unigoLeadArray[4] self.awardAmount = self.unigoLeadArray[5] self.recipients = self.unigoLeadArray[6] self.requirements = self.unigoLeadArray[7] self.additionalInfo = self.unigoLeadArray[8] self.contact = self.unigoLeadArray[9] self.address = self.unigoLeadArray[10] self.deadlineInformation = self.unigoLeadArray[11] self.sourceWebsite = self.unigoLeadArray[12] self.sourceWebsite = re.sub("'", '', self.sourceWebsite) self.sourceText = self.unigoLeadArray[13] self.date = time.strftime('%Y%m%d') def checkIfAlreadyInDatabase(self): matchingRow = self.db.getRowsDB( "select * from dbo.UnigoLeads where Name='" + self.name + "' and Url='" + self.url + "'") # "select * from dbo.UnigoLeads where Name='" + self.name + "' and Url='" + self.url + "'" if matchingRow != []: return True else: return False def writeFileToDisk(self): tableName = 'UnigoLeads' user = '******' website = re.sub('Leads', '', tableName) columns = self.db.getColumnNamesFromTable(tableName) currentRow = self.db.getRowsDB( "select * from dbo.UnigoLeads where Name='" + self.name + "' and Url='" + self.url + "'")[ 0] # "select * from dbo.UnigoLeads where Name='" + self.name + "' and Url='" + self.url + "'" self.fileSystemDB.writeFile(columns, currentRow, user, website, self.url, self.date) def insertUpdateLead(self): if not self.checkIfAlreadyInDatabase(): self.db.insertUpdateOrDeleteDB( "INSERT INTO dbo.UnigoLeads (Name, Amount, Deadline, Url, Sponsor, AwardAmount, Recipients, Requirements, AdditionalInfo, Contact, Address, DeadlineInformation, SourceWebsite, SourceText, Tag, BadScholarship, Date) VALUES (N'" + self.name + "', N'" + self.amount + "', N'" + self.deadline + "', N'" + self.url + "', N'" + self.sponsor + "', N'" + self.awardAmount + "', N'" + self.recipients + "', N'" + self.requirements + "', N'" + self.additionalInfo + "', N'" + self.contact + "', N'" + self.address + "', N'" + self.deadlineInformation + "', N'" + self.sourceWebsite + "', N'" + self.sourceText + "', N'" + self.fundingClassification + "', N'" + self.badScholarshipClassification + "', '" + self.date + "')") self.writeFileToDisk() return True else: self.db.insertUpdateOrDeleteDB( "UPDATE dbo.UnigoLeads SET Amount=N'" + self.amount + "', Deadline=N'" + self.deadline + "', Sponsor=N'" + self.sponsor + "', AwardAmount=N'" + self.awardAmount + "', Recipients=N'" + self.recipients + "', Requirements=N'" + self.requirements + "', AdditionalInfo=N'" + self.additionalInfo + "', Contact=N'" + self.contact + "', Address=N'" + self.address + "', DeadlineInformation=N'" + self.deadlineInformation + "', SourceWebsite=N'" + self.sourceWebsite + "', SourceText=N'" + self.sourceText + "', Tag=N'" + self.fundingClassification + "', BadScholarship=N'" + self.badScholarshipClassification + "', Date='" + self.date + "' WHERE Name=N'" + self.name + "' AND Url='" + self.url + "'") # UPDATE dbo.UnigoLeads SET Amount=N'" + self.amount + "', Deadline=N'" + self.deadline + "', Url='" + self.url + "', Sponsor=N'" + self.sponsor + "', AwardAmount=N'" + self.awardAmount + "', Recipients=N'" + self.recipients + "', Requirements=N'" + self.requirements + "', AdditionalInfo=N'" + self.additionalInfo + "', Contact=N'" + self.contact + "', Address=N'" + self.address + "', DeadlineInformation=N'" + self.deadlineInformation + "', SourceText=N'" + self.sourceText + "', Tag=N'" + self.fundingClassification + "', BadScholarship=N'" + self.badScholarshipClassification + "', Date='" + self.date + "' WHERE Name=N'" + self.name + "' AND SourceWebsite=N'" + self.sourceWebsite + "' # UPDATE dbo.UnigoLeads SET Amount=N'" + self.amount + "', Deadline=N'" + self.deadline + "', Sponsor=N'" + self.sponsor + "', AwardAmount=N'" + self.awardAmount + "', Recipients=N'" + self.recipients + "', Requirements=N'" + self.requirements + "', AdditionalInfo=N'" + self.additionalInfo + "', Contact=N'" + self.contact + "', Address=N'" + self.address + "', DeadlineInformation=N'" + self.deadlineInformation + "', SourceWebsite=N'" + self.sourceWebsite + "', SourceText=N'" + self.sourceText + "', Tag=N'" + self.fundingClassification + "', BadScholarship=N'" + self.badScholarshipClassification + "', Date='" + self.date + "' WHERE Name=N'" + self.name + "' AND Url=N'" + self.url + "' self.writeFileToDisk() return False
class InsertMastersInEducationArrayIntoDB(object): def __init__(self, mastersInEducationLeadArray, fundingClassification, badScholarshipClassification): self.mastersInEducationLeadArray = mastersInEducationLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = mastersInEducationLeadArray[0] self.amount = mastersInEducationLeadArray[1] self.deadline = mastersInEducationLeadArray[2] self.description = mastersInEducationLeadArray[3] self.sourceWebsite = mastersInEducationLeadArray[4] self.sourceText = mastersInEducationLeadArray[5] self.date = time.strftime('%Y%m%d') def checkIfAlreadyInDatabase(self): matchingRow = self.db.getRowsDB( "select * from dbo.MastersInEducationLeads where Name='" + self.name + "' and SourceWebsite='" + self.sourceWebsite + "'") if matchingRow != []: return True else: return False def writeFileToDisk(self): tableName = 'MastersInEducationLeads' user = '******' website = re.sub('Leads', '', tableName) columns = self.db.getColumnNamesFromTable(tableName) currentRow = self.db.getRowsDB( "select * from dbo.MastersInEducationLeads where Name='" + self.name + "' and SourceWebsite='" + self.sourceWebsite + "'")[0] self.fileSystemDB.writeFile(columns, currentRow, user, website, self.sourceWebsite, self.date) def insertUpdateLead(self): if not self.checkIfAlreadyInDatabase(): self.db.insertUpdateOrDeleteDB( "INSERT INTO dbo.MastersInEducationLeads (Name, Deadline, Amount, Description, SourceWebsite, SourceText, Tag, BadScholarship, Date) VALUES (N'" + self.name + "', N'" + self.deadline + "', N'" + self.amount + "', N'" + self.description + "', N'" + self.sourceWebsite + "', N'" + self.sourceText + "', '" + self.fundingClassification + "', '" + self.badScholarshipClassificaion + "', '" + self.date + "')") self.writeFileToDisk() return True else: self.db.insertUpdateOrDeleteDB( "update dbo.MastersInEducationLeads set Deadline='" + self.deadline + "', Amount='" + self.amount + "', Description='" + self.description + "', SourceText='" + self.sourceText + "', Tag='" + self.fundingClassification + "', BadScholarship='" + self.badScholarshipClassificaion + "', Date='" + self.date + "' where Name='" + self.name + "' and SourceWebsite='" + self.sourceWebsite + "'") self.writeFileToDisk() return False
def getDefaultList(): fastFindMajorsList = [] db = SUDBConnect() rows = db.getRowsDB( "select replace( ValueShown, '(' + OtherValuesToCheck + ')', '') as Major from dbo.FastFindLists where AttributeId=417") for row in rows: fastFindMajorsList.append(row.Major) fastFindMajorsList = [re.sub('\(.*?\)', '', major.strip()) for major in fastFindMajorsList] return fastFindMajorsList
def test_readFile(self): db = SUDBConnect(destination='filesystem') user = '******' website = 'KyasTestWebsite' url = 'www.kyastestwebsite.com' date = '20151204' filePath = db.createFilePath(user, website, url, date) columns, values = db.readFileGetColumnsAndData(filePath) expectedColumns = ['animal', 'name'] expectedValues = ['kitty', 'guen'] self.assertEqual(expectedColumns, columns) self.assertEqual(expectedValues, values)
class InsertScholarsiteLeadsArrayIntoScholarsiteLeadsDB(object): def __init__(self, scholarsiteLeadArray, fundingClassification, badScholarshipClassification): self.scholarsiteLeadArray = scholarsiteLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassificaion = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = self.scholarsiteLeadArray[0] self.amount = self.scholarsiteLeadArray[1] self.deadline = self.scholarsiteLeadArray[2] self.requirements = self.scholarsiteLeadArray[3] self.annualAwards = self.scholarsiteLeadArray[4] self.major = self.scholarsiteLeadArray[5] self.academicLevel = self.scholarsiteLeadArray[6] self.qualifiedMinorities = self.scholarsiteLeadArray[7] self.eligibleInstitution = self.scholarsiteLeadArray[8] self.eligibleRegion = self.scholarsiteLeadArray[9] self.usCitizen = self.scholarsiteLeadArray[10] self.usResident = self.scholarsiteLeadArray[11] self.foreignNational = self.scholarsiteLeadArray[12] self.minimumAge = self.scholarsiteLeadArray[13] self.maximumAge = self.scholarsiteLeadArray[14] self.classRank = self.scholarsiteLeadArray[15] self.minimumGPA = self.scholarsiteLeadArray[16] self.minimumACT = self.scholarsiteLeadArray[17] self.minimumSAT = self.scholarsiteLeadArray[18] self.date = time.strftime('%Y%m%d') def checkIfAlreadyInDB(self): matchingRow = self.db.getRowsDB( "select * from dbo.ScholarsiteLeads where Name='" + self.name + "'") if matchingRow != []: return True else: return False def writeFileToDisk(self): tableName = 'ScholarsiteLeads' user = '******' website = re.sub('Leads', '', tableName) columns = self.db.getColumnNamesFromTable(tableName) currentRow = self.db.getRowsDB( "select * from dbo.ScholarsiteLeads where Name='" + self.name + "'")[0] self.fileSystemDB.writeFile(columns, currentRow, user, website, '', self.date) def insertUpdateLead(self): if not self.checkIfAlreadyInDB(): self.db.insertUpdateOrDeleteDB( "insert into dbo.ScholarsiteLeads (Name, Amount, Deadline, Requirements, AnnualAwards, Major, AcademicLevel, QualifiedMinorities, EligibleInstitution, EligibleRegion, USCitizen, USResident, ForeignNational, MinimumAge, MaximumAge, ClassRank, MinimumGPA, MinimumACT, MinimumSAT, Tag, BadScholarship, Date) values (N'" + self.name + "', N'" + self.amount + "', '" + self.deadline + "', N'" + self.requirements + "', N'" + self.annualAwards + "', N'" + self.major + "', N'" + self.academicLevel + "', N'" + self.qualifiedMinorities + "', N'" + self.eligibleInstitution + "', N'" + self.eligibleRegion + "', N'" + self.usCitizen + "', N'" + self.usResident + "', N'" + self.foreignNational + "', '" + self.minimumAge + "', '" + self.maximumAge + "', N'" + self.classRank + "', N'" + self.minimumGPA + "', N'" + self.minimumACT + "', N'" + self.minimumSAT + "', N'" + self.fundingClassification + "', '" + self.badScholarshipClassificaion + "', '" + self.date + "')") self.writeFileToDisk() return True else: self.db.insertUpdateOrDeleteDB( "update dbo.ScholarsiteLeads set Amount=N'" + self.amount + "', Deadline=N'" + self.deadline + "', Requirements=N'" + self.requirements + "', AnnualAwards='" + self.annualAwards + "', Major='" + self.major + "', AcademicLevel=N'" + self.academicLevel + "', QualifiedMinorities=N'" + self.qualifiedMinorities + "', EligibleInstitution=N'" + self.eligibleInstitution + "', EligibleRegion=N'" + self.eligibleRegion + "', USCitizen='" + self.usCitizen + "', USResident='" + self.usResident + "', ForeignNational='" + self.foreignNational + "', MinimumAge='" + self.minimumAge + "', MaximumAge='" + self.maximumAge + "', ClassRank='" + self.classRank + "', MinimumGPA='" + self.minimumGPA + "', MinimumACT='" + self.minimumACT + "', MinimumSAT='" + self.minimumSAT + "', Tag='" + self.fundingClassification + "', BadScholarship='" + self.badScholarshipClassificaion + "', Date='" + self.date + "' where Name='" + self.name + "'") self.insertUpdateLead() return False
def __init__(self, teacherDotOrgLeadArray, fundingClassification, badScholarshipClassification): self.teacherDotOrgLeadArray = teacherDotOrgLeadArray self.fundingClassification = fundingClassification self.badScholarshipClassification = badScholarshipClassification self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = teacherDotOrgLeadArray[0] self.description = teacherDotOrgLeadArray[1] self.requirements = teacherDotOrgLeadArray[2] self.sourceWebsite = teacherDotOrgLeadArray[3] self.sourceText = teacherDotOrgLeadArray[4] self.date = time.strftime('%Y%m%d')
def __init__(self, fatomeiLeadArray, fundingClassification, badScholarshipClassification): self.badScholarshipClassification = badScholarshipClassification self.fundingClassification = fundingClassification self.fatomeiLeadArray = fatomeiLeadArray self.db = SUDBConnect() self.fileSystemDB = SUDBConnect(destination='filesystem') self.name = fatomeiLeadArray[0] self.description = fatomeiLeadArray[1] self.dueDate = fatomeiLeadArray[2] self.sourceWebsite = fatomeiLeadArray[3] self.sourceText = fatomeiLeadArray[4] self.date = time.strftime('%Y%m%d')
def __init__(self, tableName, user='******'): self.tableName = tableName self.user = user self.website = re.sub('Leads', '', self.tableName) self.sqlDB = SUDBConnect(destination='database') self.fileDB = SUDBConnect(destination='filesystem') self.columns = self.sqlDB.getColumnNamesFromTable(self.tableName) self.rows = self.sqlDB.getRowsDB("select * from dbo.%s" % self.tableName) self.urls = self.getUrls() for row, url in zip(self.rows, self.urls): values = row self.fileDB.writeFile(self.columns, values, self.user, self.website, url)
def getKeywords(tag=None): db = SUDBConnect() keywords = [] if tag: rows = db.getRowsDB("select distinct Keyword from dbo.PivotLeads where Tag='" + tag + "'") for row in rows: keywords.append(row.Keyword) else: rows = db.getRowsDB("select distinct Keyword from dbo.PivotLeads") for row in rows: keywords.append(row.Keyword) return keywords
def useOnlyFirstRegexAndRegexHelper(attributeId, stringToScan): DB = SUDBConnect() rows = DB.getRowsDB(' Select ' + str(attributeId) + ' , RegEx, RegExHelper from RegExHelpers') searchCriteriaRegex = '' searchCriteriaRegexHelper = '' doBothMatch = False if len(rows) >= 1: searchCriteriaRegex = rows[0].RegEx searchCriteriaRegexHelper = rows[0].RegExHelper if Parser(stringToScan, searchCriteriaRegex).doesMatchExist() and Parser(stringToScan, searchCriteriaRegexHelper).doesMatchExist(): doBothMatch = True return doBothMatch