def start(self, doc): try: ahdr = open( __main__.prescript_dir+'arff-header', 'r+' ) self.out.writelines( ahdr.readlines() ) ahdr.close() except IOError: msg( 'WARNING: Header %s does not exist' % (__main__.prescript_dir+'arff-header') )
def buy(self, symbol, shares, price, date): #FIXME: output to log file msg('%s BUY %s %s@%s' % (date, symbol, shares, price), ind=1) position, _, _ = self.account.positions[symbol] self.account.cash -= cfg.commission self.account.cash -= (shares * price) self.account.positions[symbol] = (position + shares, price, date) self.account.update(date)
def update_symbol(self, symbol): msg('updating data: %s' % symbol) #FIXME: auto download csv and only add new data #add new table if neccessary #get most recent weekday date #get most recent date in csv #add missing data #calc missing indicators #for now just remake the whole db #add quotes self.cur.execute((('create table if not exists %s (' % symbol) + 'id integer primary key autoincrement,' 'Date text unique not null,' 'Open real not null,' 'High real not null,' 'Low real not null,' 'Close real not null,' 'AdjClose real not null,' 'Volume real not null' ')')) reader = csv.reader(open('data/%s.csv' % symbol, 'rb')) next(reader) #skip header for Date, Open, High, Low, Close, AdjClose, Volume in reader: self.cur.execute( ('insert or ignore into %s ' % symbol + '(Date, Open, High, Low, Close, AdjClose, Volume)' 'values (?,?,?,?,?,?,?)'), (Date, Open, High, Low, Close, AdjClose, Volume)) #add splits s = '_splits' self.cur.execute('create table if not exists %s%s (' % (symbol, s) + 'id integer primary key autoincrement,' 'Date text unique not null,' 'Split text not null' ')') reader = csv.reader(open('data/%s%s.csv' % (symbol, s), 'rb')) next(reader) #skip header for Date, Split in reader: self.cur.execute( ('insert or ignore into %s%s ' % (symbol, s) + '(Date, Split)' 'values (?,?)'), (Date, Split)) #add dividends s = '_dividends' self.cur.execute('create table if not exists %s%s (' % (symbol, s) + 'id integer primary key autoincrement,' 'Date text unique not null,' 'Dividend real not null' ')') reader = csv.reader(open('data/%s%s.csv' % (symbol, s), 'rb')) next(reader) #skip header for Date, Dividend in reader: self.cur.execute(('insert or ignore into %s%s ' % (symbol, s) + '(Date, Dividend)' 'values (?,?)'), (Date, Dividend))
def __init__(self, name='testaccount'): self.name = name self.cash = cfg.start_cash self.networth = self.cash self.buypower = self.cash * cfg.risk self.positions = self.init_positions() #symbol: shares self.trades = self.init_trades() # symbol: [(date_bought, #date_sold, price_bought, price_sold, shares), ...] msg('using account: %s' % self.name, '+')
def sell(self, symbol, shares, price, date): #FIXME: output to log file msg('%s SELL %s %s@%s' % (date, symbol, shares, price), ind=1) position, prev_price, prev_date = self.account.positions[symbol] self.account.cash -= cfg.commission self.account.cash += (shares * price) self.account.positions[symbol] = (position - shares, price, date) self.account.trades[symbol].append( (prev_date, date, prev_price, price, shares)) self.account.update(date)
def info(self, date): #DEBUG for symbol in self.trades: print '\n', symbol, 'trades' for trade in self.trades[symbol]: print ' ', trade self.update(date) stats = self.calc_stats() msg('account info for \'%s\'' % self.name) for stat in self.stat_listing: msg('%20s: %0.2f' % (stat, stats[stat]), ind=1)
def assembleDocument(frags): msg( "Assembling document" ) docasm = DocAssembler() pageasm = PageAssembler( docasm ) lineasm = LineAssembler(frags.modeCharWidth(), pageasm ) for fragment in frags.getFragData(): lineasm.submit( fragment ) lineasm.done() return docasm.getDocument()
def backtest(weights): #iterate daily through weekdays for day in rrule.rrule(rrule.DAILY, dtstart=cfg.test_begin, until=cfg.test_end): #skip weekends, holidays (only for holidays 1995 onward) date = str(day.date()) if day.weekday() > 4 or date in cfg.holidays: continue day_start = time.time() #DEBUG #if date != '2017-09-29': # continue #analyze all symbols evals = {} for symbol in cfg.tickers: evals[symbol] = analyze.Analyze(symbol, date, weights) #print ' eval: ', symbol, evals[symbol].evaluation #choose best evaluations choices = analyze.best_eval(evals) for choice in choices: #create trade to buy/sell/pass number of shares at price symbol, price, shares = analyze.pick_trade(choice) if shares > 0: cfg.api.buy(symbol, shares, price, date) elif shares < 0: cfg.api.flatten(symbol, price, date) elapsed = round(time.time() - day_start, 3) msg('%s analyzed in %s (%s - %s)' % (date, elapsed, cfg.api.account_balance(), cfg.api.account_networth())) #update account at end of day cfg.api.update_account(date) #flatten any open positions at end cfg.api.close_all(str(cfg.test_end.date())) #update account cfg.api.update_account(date)
def __init__(self, name): self.name = 'data/%s' % name try: self.con = sql.connect(self.name) self.cur = self.con.cursor() #print self.query('select sqlite_version()') msg('connected to db: %s' % self.name, '+') #make sure db has correct data / correct tables except: #error and exit for now msg('could not connect to database: %s' % self.name, '-') sys.exit(1) #add or update specified symbols for symbol in cfg.tickers: self.update_symbol(symbol)
def preprocess(self): self.checkPolarity() # calculate document wide mode values self.MOLS=statfunctions.Mode(self.all_ls) self.MOLEN=statfunctions.Mode(self.all_len) msg( '\tMOLS='+`self.MOLS` ) msg( '\tMOLEN='+`self.MOLEN` ) self.calcMargins() lastPageLastLine = None for i in range(len(self.pages)): self.pages[i].calcValues(self.MOLS,self.MOLEN,self.modertmargins, lastPageLastLine) if self.pages[i][-1].type == Line.Plain or len(self.pages[i]) < 2: n = -1 else: n = -2 lastPageLastLine = self.pages[i].lines[n]
def checkParams(argv): # check we have all commandline parameters if len(argv) < 3: msg( "Usage: prescript <plain|html|arff> <input> [output]" ) sys.exit(1) inputFilename, format = argv[2],argv[1] # append the .ps if it was omitted if not inputFilename.endswith(".ps"): inputFilename += '.ps' # plain is a more convenient word to use if format == 'plain': format = 'txt' # check that it's a valid format if not format in io.knownFormats: msg( "Unknown format %s." % format ) sys.exit(1) # make sure it exists if not os.path.exists( inputFilename ): msg( "Can't find (or access) file '%s'" % inputFilename ) sys.exit(1) if len(sys.argv) == 4: outFilename = sys.argv[3] else: outFilename = misc.MakeFilename(inputFilename, '.'+format) return format,inputFilename,outFilename
def main(): #general init done on config import #train weights weights = {} if cfg.train: msg('beginning to train', '+') weights = train() msg('done training', '+') #backtest algorithm if cfg.backtest: msg('beginning backtest', '+') start = time.time() backtest(weights) msg('finished testing in %s' % round(time.time() - start, 3), '+') cfg.api.account_info(cfg.today)
def readPostScriptDataFile(FH, worker): errs = 0 while 1: input = FH.readline() if not input: break input = strip(input) if len(input) == 0: continue input = split(input, '\t') if input[0][0] == "P": worker.newPage() elif input[0][0] == "S" and len(input) == 8: [tag, x0, y0, string, ytop, ybot, x1, y1] = input # If x1 is 'S', then some funny recursive font stuff has happened. # Ignore the recursive stuff, and search for the rest of this line if x1 == "S": while 1: input = FH.readline(); if input[0] != "S": break [x1, y1] = split(input[:-1], '\t')[:2] string = unquote(string) if len(string) > 0: worker.textFragment( Fragment( atoi(x0), atoi(y0), string, atoi(x1), atoi(y1))) else: msg( "Bad fragment line: "+`input`) errs = errs + 1 if errs == maxBadLines: msg( 'Error limit encounter, aborting. Is this *really* a post script file?' ) sys.exit(1) worker.done()
def applyHandcheck(inputFilename, document): hcstring={'0':'linefeed','1':'paragraph','2':'pagebreaklinefeed','3':'pagebreakparagraph','4':'explicitlinefeed','5':'picnoise'} handcheckFN = misc.MakeFilename(inputFilename,'.handclass') if os.path.exists( handcheckFN ): msg( 'Found and applying handcheck file' ) hcf = open( handcheckFN ) for page in document: for line in page: try: if line.type in ARFFFormatter.ARFFtypes: line.handclass = hcstring[string.strip(hcf.readline())] except KeyError, val: if val in [None,'']: # cope with EOF msg( 'ERROR: Handclass file ran out before the end of the document!' ) return # otherwise it's a bad keyword msg( 'Unknown classification: '+`val` )
def readFragments(psFilename): msg( "Reading PS fragments" ) gspipe = os.popen("gs -q -dNODISPLAY -soutfile=%%stdout %sprescript.ps %s quit.ps" % (__main__.prescript_dir, psFilename)) fragdata = PSDatReader() readPostScriptDataFile( gspipe, fragdata ) return fragdata
def findFreq( self, list ): if len(list) == 0: # can't operate on nothing msg( "findFreq: WARNING: called with an empty input list!!" ) return [] output = [] # output list f = 0 # frequency of val val = list[0] # start with the first element valcum = (0,0) # cumulation of vals min = max = val[1] # minimum and maximum y values # since val's aren't strictly the same, we'll average the val's to get a # more accurate view of what val actually is for l in list: if misc.isEqual(val[0],l[0],Xequalness): f = f + 1 # we've found another val valcum = (valcum[0] + l[0], valcum[1] + l[1]) if l[1] < min: min = l[1] elif l[1] > max: max = l[1] else: output.append( (f, round(valcum[0]/f,1),round(valcum[1]/f,1), min, max ) ) # reset val = l # next val f = 1 # and we've already found one valcum = l min = max = val[1] output.append( (f, round(valcum[0]/f,1),round(valcum[1]/f,1), min, max) ) output.sort() # sort on frequency (pri) and x (sec) # output now contains the list of consecutive frequencies. # I found the following few lines of code very difficult to comment, # so please excuse the bad explanation. # # 'output contains a list of (consecutive frequency,value) pairs. That # means you can have several records that have the same value, but since # they weren't consecutive in the input list, they were not combined. # # for example, [(1,10),(2,8),(1,10),(1,9),(5,10)] # # in that example, 10 is the value with a sufficiently high frequency, # and so all values less than 10 must be deleted and all (x,10) records # must be combined. # this code finds the first record whose f is >= MinConsecFreq i = 0 # where to cut while i < len(output) and output[i][0] < MinConsecFreq: i = i + 1 # find first i where output[i] >= MinConsecFreq # if i points off the list, no eligible candidates were found. # When this happens, we take the largest frequency found, which will # be the last record in the list. if i == len(output): i = i - 1 # this code backtracks to ensure that any occurrances of the val # associated with the i'th record also gets included. Without this, # the frequency reported for the val[i] might be too small because # some small groups (less than MinConsecFreq) were discounted # because their f's were too small. Sort of. Try and make sense # of that. j = 0 while j < i: if misc.isEqual(output[j][1],output[i][1],Xequalness): j = j + 1 else: del output[j] i = i - 1 # output now contains only eligible frequencies which must now be combined # ie, [(1,10),(2,10),(1,11)] ==> [(3,10),(1,11)] # output.sort( lambda a,b: int(a[1] - b[1]) ) # sort on x value i = 0 while i <= len(output) - 2: # go from 0 to second to last index if misc.isEqual( output[i][1], output[i+1][1],Xequalness ): #abs(output[i][1] - output[i+1][1]) <= 2: output[i] = self.mergeRecords( output[i], output[i+1], lambda a,b: round((a+b)/2) ) del output[i+1] else: i = i + 1 return output
def preprocessDocument(document): msg( "Preprocessing document" ) document.preprocess()
def renderDocument(formatter, document): msg( "Rendering document" ) formatter.start(document) for page in document: renderPage(formatter, page) formatter.end(document)
def __init__(self): msg('connected to api', '+') self.account = Account()