def amazonInstablePatternSearchTest(createTestFile=False): """ This pattern is looking for an in-stable day for Amazon. PATTERN SEQ(AmazonStockPriceUpdate x1, AmazonStockPriceUpdate x2, AmazonStockPriceUpdate x3) WHERE x1.LowestPrice <= 75 AND x2.PeakPrice >= 78 AND x3.LowestPrice <= x1.LowestPrice WITHIN 1 day """ amazonInstablePattern = Pattern( SeqOperator( [QItem("AMZN", "x1"), QItem("AMZN", "x2"), QItem("AMZN", "x3")]), AndFormula( SmallerThanEqFormula( IdentifierTerm("x1", lambda x: x["Lowest Price"]), AtomicTerm(75)), AndFormula( GreaterThanEqFormula( IdentifierTerm("x2", lambda x: x["Peak Price"]), AtomicTerm(78)), SmallerThanEqFormula( IdentifierTerm("x3", lambda x: x["Lowest Price"]), IdentifierTerm("x1", lambda x: x["Lowest Price"])))), timedelta(days=1)) runTest('amazonInstable', [amazonInstablePattern], createTestFile)
def multiplePatternSearchTest(createTestFile=False): amazonInstablePattern = Pattern( SeqOperator( [QItem("AMZN", "x1"), QItem("AMZN", "x2"), QItem("AMZN", "x3")]), AndFormula( SmallerThanEqFormula( IdentifierTerm("x1", lambda x: x["Lowest Price"]), AtomicTerm(75)), AndFormula( GreaterThanEqFormula( IdentifierTerm("x2", lambda x: x["Peak Price"]), AtomicTerm(78)), SmallerThanEqFormula( IdentifierTerm("x3", lambda x: x["Lowest Price"]), IdentifierTerm("x1", lambda x: x["Lowest Price"])))), timedelta(days=1)) googleAscendPattern = Pattern( SeqOperator( [QItem("GOOG", "a"), QItem("GOOG", "b"), QItem("GOOG", "c")]), AndFormula( SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula(IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), timedelta(minutes=3)) runTest('multiplePatterns', [amazonInstablePattern, googleAscendPattern], createTestFile)
def msftDrivRacePatternSearchTest(createTestFile=False): """ This pattern is looking for a race between driv and microsoft in ten minutes PATTERN SEQ(MicrosoftStockPriceUpdate a, DrivStockPriceUpdate b, MicrosoftStockPriceUpdate c, DrivStockPriceUpdate d, MicrosoftStockPriceUpdate e) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < d.PeakPrice AND d.PeakPrice < e.PeakPrice WITHIN 10 minutes """ msftDrivRacePattern = Pattern( SeqOperator([ QItem("MSFT", "a"), QItem("DRIV", "b"), QItem("MSFT", "c"), QItem("DRIV", "d"), QItem("MSFT", "e") ]), AndFormula( AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), AndFormula( SmallerThanFormula( IdentifierTerm("c", lambda x: x["Peak Price"]), IdentifierTerm("d", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("d", lambda x: x["Peak Price"]), IdentifierTerm("e", lambda x: x["Peak Price"])))), timedelta(minutes=10)) runTest('msftDrivRace', [msftDrivRacePattern], createTestFile)
def nonFrequencyPatternSearch2Test(createTestFile=False): pattern = Pattern( SeqOperator( [QItem("LOCM", "a"), QItem("AMZN", "b"), QItem("AAPL", "c")]), AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta(minutes=5)) runTest("nonFrequency2", [pattern], createTestFile)
def googleIncreasePatternSearchTest(createTestFile=False): """ This Pattern is looking for a 1% increase in the google stock in a half-hour. PATTERN SEQ(GoogleStockPriceUpdate a, GoogleStockPriceUpdate b) WHERE b.PeakPrice >= 1.01 * a.PeakPrice WITHIN 30 minutes """ googleIncreasePattern = Pattern( SeqOperator([QItem("GOOG", "a"), QItem("GOOG", "b")]), GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), MulTerm(AtomicTerm(1.01), IdentifierTerm("a", lambda x: x["Peak Price"]))), timedelta(minutes=30)) runTest('googleIncrease', [googleIncreasePattern], createTestFile)
def arrivalRatesPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator( [QItem("AAPL", "a"), QItem("AMZN", "b"), QItem("LOCM", "c")]), AndFormula( GreaterThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), GreaterThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta(minutes=5)) pattern.set_statistics(StatisticsTypes.ARRIVAL_RATES, [0.0159, 0.0153, 0.0076]) runTest("arrivalRates", [pattern], createTestFile, EvaluationMechanismTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)
def googleAmazonLowPatternSearchTest(createTestFile=False): """ This pattern is looking for low prices of Amazon and Google at the same minute. PATTERN AND(AmazonStockPriceUpdate a, GoogleStockPriceUpdate g) WHERE a.PeakPrice <= 73 AND g.PeakPrice <= 525 WITHIN 1 minute """ googleAmazonLowPattern = Pattern( AndOperator([QItem("AMZN", "a"), QItem("GOOG", "g")]), AndFormula( SmallerThanEqFormula( IdentifierTerm("a", lambda x: x["Peak Price"]), AtomicTerm(73)), SmallerThanEqFormula( IdentifierTerm("g", lambda x: x["Peak Price"]), AtomicTerm(525))), timedelta(minutes=1)) runTest('googleAmazonLow', [googleAmazonLowPattern], createTestFile)
def amazonSpecificPatternSearchTest(createTestFile=False): """ This pattern is looking for an amazon stock in peak price of 73. """ amazonSpecificPattern = Pattern( SeqOperator([QItem("AMZN", "a")]), EqFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), AtomicTerm(73))) runTest('amazonSpecific', [amazonSpecificPattern], createTestFile)
def nonFrequencyTailoredPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator( [QItem("DRIV", "a"), QItem("MSFT", "b"), QItem("CBRL", "c")]), AndFormula( GreaterThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), GreaterThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta.max) runTest( 'nonFrequencyTailored1', [pattern], createTestFile, eval_mechanism_type=EvaluationMechanismTypes.TRIVIAL_LEFT_DEEP_TREE, events=nasdaqEventStream)
def hierarchyPatternSearchTest(createTestFile=False): """ The following pattern is looking for Amazon < Apple < Google cases in one minute windows. PATTERN AND(AmazonStockPriceUpdate a, AppleStockPriceUpdate b, GoogleStockPriceUpdate c) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice WITHIN 1 minute """ hierarchyPattern = Pattern( AndOperator( [QItem("AMZN", "a"), QItem("AAPL", "b"), QItem("GOOG", "c")]), AndFormula( SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula(IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), timedelta(minutes=1)) runTest('hierarchy', [hierarchyPattern], createTestFile)
def frequencyPatternSearch2Test(createTestFile=False): pattern = Pattern( SeqOperator( [QItem("LOCM", "a"), QItem("AMZN", "b"), QItem("AAPL", "c")]), AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta(minutes=5)) pattern.set_statistics(StatisticsTypes.FREQUENCY_DICT, { "AAPL": 2, "AMZN": 3, "LOCM": 1 }) runTest("frequency2", [pattern], createTestFile, EvaluationMechanismTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)
def googleAscendPatternSearchTest(createTestFile=False): """ This pattern is looking for a short ascend in the Google peak prices. PATTERN SEQ(GoogleStockPriceUpdate a, GoogleStockPriceUpdate b, GoogleStockPriceUpdate c) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice WITHIN 3 minutes """ googleAscendPattern = Pattern( SeqOperator( [QItem("GOOG", "a"), QItem("GOOG", "b"), QItem("GOOG", "c")]), AndFormula( SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula(IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), timedelta(minutes=3), ) runTest('googleAscend', [googleAscendPattern], createTestFile)
def frequencyTailoredPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator( [QItem("DRIV", "a"), QItem("MSFT", "b"), QItem("CBRL", "c")]), AndFormula( GreaterThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), GreaterThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta.max) frequencyDict = {"MSFT": 256, "DRIV": 257, "CBRL": 1} pattern.set_statistics(StatisticsTypes.FREQUENCY_DICT, frequencyDict) runTest('frequencyTailored1', [pattern], createTestFile, eval_mechanism_type=EvaluationMechanismTypes. SORT_BY_FREQUENCY_LEFT_DEEP_TREE, events=nasdaqEventStream)
def zStreamPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator([ QItem("MSFT", "a"), QItem("DRIV", "b"), QItem("ORLY", "c"), QItem("CBRL", "d") ]), AndFormula( AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), SmallerThanFormula(IdentifierTerm("c", lambda x: x["Peak Price"]), IdentifierTerm("d", lambda x: x["Peak Price"]))), timedelta(minutes=3)) selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0], [0.9457796098355941, 1.0, 0.15989723367389616, 1.0], [1.0, 0.15989723367389616, 1.0, 0.9992557393942864], [1.0, 1.0, 0.9992557393942864, 1.0]] arrivalRates = [ 0.016597077244258872, 0.01454418928322895, 0.013917884481558803, 0.012421711899791231 ] pattern.set_statistics( StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES, (selectivityMatrix, arrivalRates)) runTest('zstream1', [pattern], createTestFile, eval_mechanism_type=EvaluationMechanismTypes.ZSTREAM_BUSHY_TREE, events=nasdaqEventStream)
def sortedStorageBenchMarkTest(createTestFile=False): pattern = Pattern( AndOperator([ QItem("DRIV", "a"), QItem("MSFT", "b"), QItem("CBRL", "c"), QItem("MSFT", "m") ]), AndFormula( GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Lowest Price"]), IdentifierTerm("a", lambda x: x["Lowest Price"])), AndFormula( GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"])), GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Lowest Price"]), IdentifierTerm("m", lambda x: x["Lowest Price"])), ), ), timedelta.max, ) runBenchMark("sortedStorageBenchMark - default storage", [pattern]) storage_params = TreeStorageParameters(True, { "a": 122, "b": 139, "c": 104, "m": 139 }) runBenchMark("sortedStorageBenchMark - sorted storage", [pattern], storage_params=storage_params)
def simplePatternSearchTest(createTestFile=False): """ PATTERN SEQ(AppleStockPriceUpdate a, AmazonStockPriceUpdate b, AvidStockPriceUpdate c) WHERE a.OpeningPrice > b.OpeningPrice AND b.OpeningPrice > c.OpeningPrice WITHIN 5 minutes """ pattern = Pattern( SeqOperator( [QItem("AAPL", "a"), QItem("AMZN", "b"), QItem("AVID", "c")]), AndFormula( GreaterThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), GreaterThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"]))), timedelta(minutes=5), ) runTest("simple", [pattern], createTestFile)
def sortedStorageTest(createTestFile=False): pattern = Pattern( AndOperator( [QItem("DRIV", "a"), QItem("MSFT", "b"), QItem("CBRL", "c")]), AndFormula( GreaterThanFormula( IdentifierTerm("a", lambda x: x["Opening Price"]), IdentifierTerm("b", lambda x: x["Opening Price"])), GreaterThanFormula( IdentifierTerm("b", lambda x: x["Opening Price"]), IdentifierTerm("c", lambda x: x["Opening Price"])), ), timedelta.max, ) runTest( "sortedStorageTest", [pattern], createTestFile, eval_mechanism_type=EvaluationMechanismTypes.TRIVIAL_LEFT_DEEP_TREE, events=nasdaqEventStream, )
def nonsensePatternSearchTest(createTestFile=False): """ This pattern is looking for something that does not make sense. PATTERN AND(AmazonStockPriceUpdate a, AvidStockPriceUpdate b, AppleStockPriceUpdate c) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < a.PeakPrice """ nonsensePattern = Pattern( AndOperator( [QItem("AMZN", "a"), QItem("AVID", "b"), QItem("AAPL", "c")]), AndFormula( SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), AndFormula( SmallerThanFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("c", lambda x: x["Peak Price"]), IdentifierTerm("a", lambda x: x["Peak Price"])))), timedelta(minutes=1)) runTest('nonsense', [nonsensePattern], createTestFile)
def oneArgumentsearchTest(createTestFile=False): pattern = Pattern( SeqOperator([QItem("AAPL", "a")]), GreaterThanFormula(IdentifierTerm("a", lambda x: x["Opening Price"]), AtomicTerm(135)), timedelta.max) runTest("one", [pattern], createTestFile)