Beispiel #1
0
def fun1():
    i = 0
    while i < float('inf'):
        if i == 0 or i % 100 == 0:
            ip_get.run()
            i += 1
        else:
            query.run()
            i += 1
Beispiel #2
0
def main():
    if (len(sys.argv) not in [2, 3]) or (sys.argv[1]
                                         not in ['load', 'query', 'clear']):
        print 'Error: invalid usage\nUsage: python main.py load|query'
        sys.exit(1)

    client = connect()
    if sys.argv[1] == 'load':
        if len(sys.argv) == 3 and sys.argv[2] == "loopdata":
            loader.load_loopdata(client)
        else:
            loader.load(client)
    if sys.argv[1] == 'query':
        query.run(client)
    if sys.argv[1] == 'clear':
        loader.clear(client)
  # since we don't currently have any way of passing args into mappers

  args = parser.parse_args()  
  PAST_CHANGE = args.past_change * 10**-4
  if args.future_change:
    FUTURE_CHANGE = args.future_change * 10 ** -4
  else:
    FUTURE_CHANGE = PAST_CHANGE 
  OFFSET_TICKS = args.time_offset * 10

  START_HOUR = args.start_hour 
  END_HOUR = args.end_hour 
  
  assert START_HOUR < END_HOUR
  
  print "past change %", PAST_CHANGE
  print "future change %", FUTURE_CHANGE 
  print "time offset in ticks", OFFSET_TICKS
  
  all_ccys =  query.run(args.pattern, 
    map_hdf = mapper, 
   init = {}, combine = combine)
  # Once all the counts have been collected, do the simple task of 
  # computing probs by dividing event counts by the totals
  result = {}
  for (ccy,  (event_count, total)) in all_ccys.items():
    if total == 0: prob = 0
    else: prob = event_count / float(total)
    result[ccy]  = {'prob':prob, 'count':event_count, 'total':total}
  print result
Beispiel #4
0
def combine(all_amts, (ccy, amts)):
    all_amts.setdefault(ccy, []).extend(amts)


from argparse import ArgumentParser
parser = ArgumentParser(description='Process some integers.')
parser.add_argument('pattern',
                    metavar='P',
                    type=str,
                    help='s3://capk-bucket/some-hdf-pattern')
parser.add_argument(
    '--min-duration',
    dest='min_dur',
    type=int,
    default=None,
    help=
    'ignore crosses which last shorter than this min. duration in milliseconds'
)

if __name__ == '__main__':
    args = parser.parse_args()
    assert args.pattern
    assert len(args.pattern) > 0
    MIN_DUR = args.min_dur
    df = query.run(args.pattern,
                   map_hdf=cross_amounts,
                   init={},
                   combine=combine,
                   post_process=query_helpers.summarize_continuous)
    print df
Beispiel #5
0
import query

r = query.run()
r.full_query()
Beispiel #6
0
from optparse import OptionParser
import query 
import cross_info
import query_helpers

def cross_durations(hdf):
  ccy = hdf.attrs['ccy']
  durs = [cross.dur for cross in cross_info.find_crossed_markets_in_hdf(hdf)]
  return ccy, durs   
    
def combine(all_durs, (ccy,durs)):
  all_durs.setsdefault(ccy, []).extend(durs)
  
parser = OptionParser(usage = "usage: %prog s3://bucket-name/key-pattern")
if __name__ == '__main__':
  (options, args) = parser.parse_args()
  assert len(args)==1
  df = query.run(args[0], map_hdf = cross_durations, 
   init = {}, combine = combine, 
   post_process = query_helpers.summarize_continuous)
  print df

def combine(all_amts, (ccy, amts)):
    all_amts.setdefault(ccy, []).extend(amts)


from argparse import ArgumentParser

parser = ArgumentParser(description="Process some integers.")
parser.add_argument("pattern", metavar="P", type=str, help="s3://capk-bucket/some-hdf-pattern")
parser.add_argument(
    "--min-duration",
    dest="min_dur",
    type=int,
    default=None,
    help="ignore crosses which last shorter than this min. duration in milliseconds",
)
parser.add_argument("--future", dest="future", type=int, default=10000)

if __name__ == "__main__":
    args = parser.parse_args()
    print "Args", args
    assert args.pattern
    assert len(args.pattern) > 0
    MIN_DUR = args.min_dur
    FUTURE_HORIZON = args.future
    df = query.run(
        args.pattern, map_hdf=does_cross_return, init={}, combine=combine, post_process=query_helpers.summarize_bool
    )
    print df
import query 
import numpy as np 

def map(hdf):
  """Takes an HDF, returns a tuple of min/max midprices"""
  # Note: I slice into the HDF's column to pull out a numpy array 
  midprice = (hdf['bid'][:]+hdf['offer'][:])/2.0
  return np.min(midprice), np.max(midprice)

def combine(values, (curr_min, curr_max)):
  values.append(curr_min)
  values.append(curr_max)


from argparse import ArgumentParser 
parser = ArgumentParser(description='min/max for ccy pair')
parser.add_argument('--ccy', dest='ccy', type=str, required=True, help="e.g. USDJPY")
parser.add_argument('--bucket', dest='bucket', type=str, default='capk-fxcm-hdf')
if __name__ == '__main__':
  args = parser.parse_args()
  print "args", args 
  pattern = '*' + args.ccy + '*.hdf'
  values = query.run(args.bucket, pattern, 
    map_hdf = map, combine = combine, init = [])
  print "Min: %s, Max: %s" % (np.min(values), np.max(values))

def count_crosses(hdf):
    ccy = hdf.attrs['ccy']
    date = (hdf.attrs['year'], hdf.attrs['month'], hdf.attrs['day'])
    count = np.sum(hdf['bid'][:] > hdf['offer'][:])
    print "Found", count, "crossed tick for", ccy, "on", date
    return ccy, count


def combine(counts, (ccy, count)):
    old = counts.get(ccy, 0)
    counts[ccy] = old + count


def convert_to_dataframe(total_counts):
    return pandas.DataFrame({"count": total_counts.values()},
                            index=total_counts.keys())


parser = OptionParser(usage="usage: %prog s3://bucket-name/key-pattern")
if __name__ == '__main__':
    (options, args) = parser.parse_args()
    assert len(args) == 1
    df = query.run(args[0],
                   map_hdf=count_crosses,
                   init={},
                   combine=combine,
                   post_process=convert_to_dataframe)
    print df
Beispiel #10
0
MIN_DUR = None

def cross_amounts(hdf):
  ccy = hdf.attrs['ccy']
  amts = [cross.amt * cross.min_vol \
    for cross in cross_info.find_crossed_markets_in_hdf(hdf) 
    if MIN_DUR is None or cross.dur >= MIN_DUR]
  return ccy, amts   
    
def combine(all_amts, (ccy,amts)):
  all_amts.setdefault(ccy, []).extend(amts)


from argparse import ArgumentParser 
parser = ArgumentParser(description='Process some integers.')
parser.add_argument('pattern', metavar='P', type=str,
                       help='s3://capk-bucket/some-hdf-pattern')
parser.add_argument('--min-duration', dest='min_dur', type=int, default=None, 
  help  = 'ignore crosses which last shorter than this min. duration in milliseconds')
  
if __name__ == '__main__':
  args = parser.parse_args()
  assert args.pattern 
  assert len(args.pattern) > 0
  MIN_DUR = args.min_dur 
  df = query.run(args.pattern, 
    map_hdf = cross_amounts, 
   init = {}, combine = combine, 
   post_process = query_helpers.summarize_continuous)
  print df
from optparse import OptionParser
import numpy as np
import query
import pandas


def count_crosses(hdf):
    ccy = hdf.attrs["ccy"]
    date = (hdf.attrs["year"], hdf.attrs["month"], hdf.attrs["day"])
    count = np.sum(hdf["bid"][:] > hdf["offer"][:])
    print "Found", count, "crossed tick for", ccy, "on", date
    return ccy, count


def combine(counts, (ccy, count)):
    old = counts.get(ccy, 0)
    counts[ccy] = old + count


def convert_to_dataframe(total_counts):
    return pandas.DataFrame({"count": total_counts.values()}, index=total_counts.keys())


parser = OptionParser(usage="usage: %prog s3://bucket-name/key-pattern")
if __name__ == "__main__":
    (options, args) = parser.parse_args()
    assert len(args) == 1
    df = query.run(args[0], map_hdf=count_crosses, init={}, combine=combine, post_process=convert_to_dataframe)
    print df
Beispiel #12
0
        # either the cross was shorter than 50ms or both prices changed 
        # at the end 
        returns =  np.sum(future_bid_ok & future_offer_ok & future_vol_ok)
      print cross, "returns", returns, "times in", FUTURE_HORIZON / 1000.0, "seconds"
      result.append(returns > 0)
  return ccy, result
   
def combine(all_amts, (ccy,amts)):
  all_amts.setdefault(ccy, []).extend(amts)


from argparse import ArgumentParser 
parser = ArgumentParser(description='Process some integers.')
parser.add_argument('pattern', metavar='P', type=str,
                       help='s3://capk-bucket/some-hdf-pattern')
parser.add_argument('--min-duration', dest='min_dur', type=int, default=None, 
  help  = 'ignore crosses which last shorter than this min. duration in milliseconds')
parser.add_argument('--future', dest = 'future', type=int, default=10000)
  
if __name__ == '__main__':
  args = parser.parse_args()
  print "Args", args
  assert args.pattern 
  assert len(args.pattern) > 0
  MIN_DUR = args.min_dur
  FUTURE_HORIZON = args.future
  df = query.run(args.pattern, 
    map_hdf = does_cross_return, 
   init = {}, combine = combine, 
   post_process = query_helpers.summarize_bool)
  print df
Beispiel #13
0
    midprice = (hdf['bid'][:] + hdf['offer'][:]) / 2.0
    return np.min(midprice), np.max(midprice)


def combine(values, (curr_min, curr_max)):
    values.append(curr_min)
    values.append(curr_max)


from argparse import ArgumentParser
parser = ArgumentParser(description='min/max for ccy pair')
parser.add_argument('--ccy',
                    dest='ccy',
                    type=str,
                    required=True,
                    help="e.g. USDJPY")
parser.add_argument('--bucket',
                    dest='bucket',
                    type=str,
                    default='capk-fxcm-hdf')
if __name__ == '__main__':
    args = parser.parse_args()
    print "args", args
    pattern = '*' + args.ccy + '*.hdf'
    values = query.run(args.bucket,
                       pattern,
                       map_hdf=map,
                       combine=combine,
                       init=[])
    print "Min: %s, Max: %s" % (np.min(values), np.max(values))
Beispiel #14
0
    args = parser.parse_args()
    PAST_CHANGE = args.past_change * 10**-4
    if args.future_change:
        FUTURE_CHANGE = args.future_change * 10**-4
    else:
        FUTURE_CHANGE = PAST_CHANGE
    OFFSET_TICKS = args.time_offset * 10

    START_HOUR = args.start_hour
    END_HOUR = args.end_hour

    assert START_HOUR < END_HOUR

    print "past change %", PAST_CHANGE
    print "future change %", FUTURE_CHANGE
    print "time offset in ticks", OFFSET_TICKS

    all_ccys = query.run(args.pattern,
                         map_hdf=mapper,
                         init={},
                         combine=combine)
    # Once all the counts have been collected, do the simple task of
    # computing probs by dividing event counts by the totals
    result = {}
    for (ccy, (event_count, total)) in all_ccys.items():
        if total == 0: prob = 0
        else: prob = event_count / float(total)
        result[ccy] = {'prob': prob, 'count': event_count, 'total': total}
    print result