def generateFilterByRegex(regexStrL, convIdxL): """ regexStrL :: [str] Regular expression string list. convIdxL :: [(str -> ANY), int] Column index list with converter. converters will be ignored. Specify 1 for first element. 0 means all columns. separator :: str Separator string of columns in a line. return :: str -> bool Filter function. """ regexL = map(re.compile, regexStrL) idxL = map(lambda (_, idx): idx, convIdxL) project1 = pysows.generateProject(idxL) def filterByRegex(rec): """ rec :: tuple(str) return :: bool """ rec1 = project1(rec) if len(regexL) < len(rec1): raise IOError("key length %d > number of regex %d." % (len(rec1), len(regexL))) predicate = lambda (regex, x): regex.match(x) is not None return all(map(predicate, zip(regexL, rec1))) return filterByRegex
def doMain(): args = parseOpts(sys.argv[1:]) outColumnIdxes = map(prefixToIsLeft, getColumnIndexListWithPrefix(args.out_columns)) getOutRec = generateGetOutputRecord(outColumnIdxes) lReader = pysows.recordReader(args.left_input, args.separator) rReader = pysows.recordReader(args.right_input, args.separator) lKeyIdxL, rKeyIdxL = getKeyIndexLists(args) lGetKey = pysows.generateProject(lKeyIdxL) rGetKey = pysows.generateProject(rKeyIdxL) hashTable = createHashTable(lReader, lGetKey) resultIter = hashJoin(hashTable, rReader, rGetKey) for lRec, rRec in resultIter: pysows.printList(getOutRec(lRec, rRec)) print