from google.appengine.ext import webapp from google.appengine.ext.webapp import template from django.utils import simplejson # Unpicklable statements to seed new sessions with. INITIAL_UNPICKLABLES = [ '\n'.join([ 'import sys', 'sys.path.insert(0, "%s")' % os.path.dirname(os.path.dirname(os.path.dirname( (__file__)))), 'try: from autoexec import *', 'except ImportError: pass', 'finally: del sys' ]) ] DOCUMENTED_EXCEPTIONS = sets.Set() for name in dir(exceptions): e = getattr(exceptions, name) if (type(e) is type) and issubclass(e, exceptions.BaseException): DOCUMENTED_EXCEPTIONS.add(e) def confirm_permission(): """Raises an exception if the user does not have permission to execute a statement""" user = users.get_current_user() nologin = NotLoggedInError('Hello! Please $login_link to use this console') noadmin = NotAdminError( 'Please $logout_link, then log in as an administrator') if util.is_production(): if not user:
def __init__( D, line ): ## Initialize associations... D.PorQs = collections.defaultdict( list ) ## Key is elem pred. D.Scopes = { } ## Key is outscoped. D.Traces = { } ## Key is outscoped. D.Inhs = collections.defaultdict( lambda : collections.defaultdict(float) ) ## Key is inheritor. D.Inheriteds = { } D.DiscInhs = { } D.Referents = [ ] D.Taints = { } ## taint arcs run parallel to 'scotch tape' scopings D.Upward1 = { } ## upward: truthfuncly necessary, possibly not immediate outscoper D.Upward2 = { } ## if we ever need more than 2 outgoing Upwards we'll re-kludge ## For each assoc... for assoc in sorted( line.split(' ') ): src,lbl,dst = assoc.split( ',', 2 ) if ( dst.startswith('N-bO:') or dst.startswith('N-bN:') or dst.startswith('N-b{N-aD}:') or dst.startswith('N-aD-b{N-aD}:') or dst.startswith('N-aD-b{N-aD}-bN:') ) and not dst.endswith('Q'): dst += 'Q' D.Referents += [ src ] if lbl=='0' else [ src, dst ] if lbl.isdigit(): D.PorQs [src].insert( int(lbl), dst ) ## Add preds and quants. elif lbl == 's': D.Scopes [src] = dst ## Add scopes. elif lbl == 't': D.Traces [src] = dst ## Add traces. elif lbl == 'm': D.DiscInhs [src] = dst ## Add discourse anaphor. elif lbl == 'tt': D.Taints [src] = dst ## Taint-markers elif lbl == 'u': D.Upward1 [src] = dst ## Unplugged arcs elif lbl == 'uu': D.Upward2 [src] = dst else: D.Inhs [src][lbl] = dst ## Add inheritances. # if lbl == 'r': D.Nuscos [dst].append( src ) ## Index nusco of each restr. # if lbl == 'r': D.NuscoValues[src] = True if lbl == 'e': D.Inheriteds[dst] = True D.PredTuples = [ ] D.QuantTuples = [ ] ## Distinguish preds and quants... for elempred,Particips in D.PorQs.items(): ## If three participants and last restriction-inherits from previous, it's a quant... # if len( Particips ) == 3 and Inhs.get(Particips[2],{}).get('r','') == Particips[1]: QuantTuples.append( tuple( [ Particips[0] ] + [ elempred ] + Particips[1:] ) ) if Particips[0].endswith('Q'): D.QuantTuples.append( tuple( [ Particips[0] ] + [ elempred ] + Particips[1:] + (['_'] if len(Particips)<4 else []) ) ) else: D.PredTuples.append ( tuple( [ Particips[0] ] + [ elempred ] + Particips[1:] ) ) D.OrigConsts = [ (ep[0],ep[1]) for ep in D.PredTuples ] + [ (q[0],'Q') for q in D.QuantTuples ] ## Report items... if VERBOSE: print( 'P = ' + str(sorted(D.PredTuples)) ) print( 'Q = ' + str(sorted(D.QuantTuples)) ) print( 'S = ' + str(sorted(D.Scopes.items())) ) ## Construct list of inheriting refstates... D.Subs = collections.defaultdict( list ) for xLo,lxHi in D.Inhs.items(): for l,xHi in lxHi.items(): if l!='w' and l!='o': D.Subs[ xHi ].append( xLo ) D.smite() # only does smiting now. if VERBOSE: print( 'Subs = ' + str(D.Subs) ) D.Nuscos = collections.defaultdict( list ) ## Key is restrictor. D.NuscoValues = { } ## Define nuscos after smiting... for xLo,lxHi in D.Inhs.items(): for lbl,xHi in lxHi.items(): if lbl=='r': D.Nuscos[xHi].append( xLo ) D.NuscoValues[xLo] = True # ## List of referents that are or participate in elementary predications... # D.Referents = sorted( sets.Set( [ x for pred in D.PredTuples for x in pred[1:] ] + D.Inhs.keys() ) ) D.Referents = sorted( sets.Set( D.Referents ) )
UNUSED_SOURCES = sets.Set([ 'intl/icu/source/common/bytestrieiterator.cpp', 'intl/icu/source/common/cstr.cpp', 'intl/icu/source/common/cwchar.cpp', 'intl/icu/source/common/icudataver.cpp', 'intl/icu/source/common/icuplug.cpp', 'intl/icu/source/common/pluralmap.cpp', 'intl/icu/source/common/ucat.cpp', 'intl/icu/source/common/ucnv2022.cpp', 'intl/icu/source/common/ucnv_ct.cpp', 'intl/icu/source/common/ucnvdisp.cpp', 'intl/icu/source/common/ucnv_ext.cpp', 'intl/icu/source/common/ucnvhz.cpp', 'intl/icu/source/common/ucnvisci.cpp', 'intl/icu/source/common/ucnv_lmb.cpp', 'intl/icu/source/common/ucnvmbcs.cpp', 'intl/icu/source/common/uidna.cpp', 'intl/icu/source/common/unorm.cpp', 'intl/icu/source/common/usc_impl.cpp', 'intl/icu/source/common/ustr_wcs.cpp', 'intl/icu/source/common/util_props.cpp', 'intl/icu/source/i18n/anytrans.cpp', 'intl/icu/source/i18n/brktrans.cpp', 'intl/icu/source/i18n/casetrn.cpp', 'intl/icu/source/i18n/cpdtrans.cpp', 'intl/icu/source/i18n/esctrn.cpp', 'intl/icu/source/i18n/fmtable_cnv.cpp', 'intl/icu/source/i18n/funcrepl.cpp', 'intl/icu/source/i18n/gender.cpp', 'intl/icu/source/i18n/name2uni.cpp', 'intl/icu/source/i18n/nortrans.cpp', 'intl/icu/source/i18n/nultrans.cpp', 'intl/icu/source/i18n/quant.cpp', 'intl/icu/source/i18n/rbt.cpp', 'intl/icu/source/i18n/rbt_data.cpp', 'intl/icu/source/i18n/rbt_pars.cpp', 'intl/icu/source/i18n/rbt_rule.cpp', 'intl/icu/source/i18n/rbt_set.cpp', 'intl/icu/source/i18n/regexcmp.cpp', 'intl/icu/source/i18n/regeximp.cpp', 'intl/icu/source/i18n/regexst.cpp', 'intl/icu/source/i18n/regextxt.cpp', 'intl/icu/source/i18n/rematch.cpp', 'intl/icu/source/i18n/remtrans.cpp', 'intl/icu/source/i18n/repattrn.cpp', 'intl/icu/source/i18n/scientificnumberformatter.cpp', 'intl/icu/source/i18n/strmatch.cpp', 'intl/icu/source/i18n/strrepl.cpp', 'intl/icu/source/i18n/titletrn.cpp', 'intl/icu/source/i18n/tolowtrn.cpp', 'intl/icu/source/i18n/toupptrn.cpp', 'intl/icu/source/i18n/translit.cpp', 'intl/icu/source/i18n/transreg.cpp', 'intl/icu/source/i18n/tridpars.cpp', 'intl/icu/source/i18n/udateintervalformat.cpp', 'intl/icu/source/i18n/unesctrn.cpp', 'intl/icu/source/i18n/uni2name.cpp', 'intl/icu/source/i18n/uregexc.cpp', 'intl/icu/source/i18n/uregex.cpp', 'intl/icu/source/i18n/uregion.cpp', 'intl/icu/source/i18n/uspoof_build.cpp', 'intl/icu/source/i18n/uspoof_conf.cpp', 'intl/icu/source/i18n/utrans.cpp', 'intl/icu/source/i18n/vzone.cpp', 'intl/icu/source/i18n/zrule.cpp', 'intl/icu/source/i18n/ztrans.cpp', # Cluster 'intl/icu/source/common/resbund_cnv.cpp', 'intl/icu/source/common/ures_cnv.cpp', # Cluster 'intl/icu/source/common/propsvec.cpp', 'intl/icu/source/common/ucnvsel.cpp', 'intl/icu/source/common/ucnv_set.cpp', # Cluster 'intl/icu/source/common/ubiditransform.cpp', 'intl/icu/source/common/ushape.cpp', # Cluster 'intl/icu/source/i18n/csdetect.cpp', 'intl/icu/source/i18n/csmatch.cpp', 'intl/icu/source/i18n/csr2022.cpp', 'intl/icu/source/i18n/csrecog.cpp', 'intl/icu/source/i18n/csrmbcs.cpp', 'intl/icu/source/i18n/csrsbcs.cpp', 'intl/icu/source/i18n/csrucode.cpp', 'intl/icu/source/i18n/csrutf8.cpp', 'intl/icu/source/i18n/inputext.cpp', 'intl/icu/source/i18n/ucsdet.cpp', # Cluster 'intl/icu/source/i18n/alphaindex.cpp', 'intl/icu/source/i18n/ulocdata.cpp', ])
SIMULATOR_SET_CMD = None """ Current Driver Object socket """ DRIVER = None """ Determine 0 = PH Control 1 = SIM Control None = No Client """ CONTROL_MODE = None CLIENT_WHITELIST = sets.Set() THREAD_POOL = [] HOST = constant.HOST PHONE_CMD, SIMULATOR_SET_CMD """ Command Server """ logging.basicConfig(level=logging.DEBUG) def MainSocket(): command_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) command_sock.bind((HOST, 7769))
def check_java_token_values_vs_py(dbh_java_fullpath): """Compare render.py DB_TOKENS values against those in SOCDBHelper.upgradeSchema""" global java_all_ok java_all_ok = True line_num = 0 state = "" # 'state machine' shorthand for next part of the comparison area def print_err(msg): global java_all_ok print(progname + ".check_java_token_values_vs_py: " + dbh_java_fullpath + " line " + str(line_num) + ": Parse error within COMPARISON AREA, see py source; state=" + state + ": " + msg) java_all_ok = False try: token_names = None token_dbtype_vals = { } # key = dbtype or 'default', val = dict with tokennames & values with open(dbh_java_fullpath) as f: # Read lines until we see "BEGIN COMPARISON AREA". # At that point read and "parse"; ignore comment-only lines. # When we see "END COMPARISON AREA" (hopefully at expected time), stop reading. f_line = "" saw_begin_line = False saw_all_expected = False curr_case_dbtype = None # while parsing switch cases; 'default' can be a value here while java_all_ok and (f_line is not None): f_line = f.readline() if f_line is None: break f_line = f_line.strip() line_num += 1 if not len(f_line): continue if not saw_begin_line: if f_line == "// BEGIN COMPARISON AREA -- test_token_consistency.py": saw_begin_line = True state = 'decl' elif f_line == "// END COMPARISON AREA -- test_token_consistency.py": if not saw_all_expected: print(progname + ': "END COMPARISON AREA" too early (line ' + str(line_num) + ' state ' + state + ') in ' + dbh_java_fullpath) java_all_ok = False else: break # <--- Normal read-loop termination --- else: if f_line.startswith("//"): continue if state == 'decl': # assumes 2 or more tokens are declared, all on same line if f_line.startswith("final String "): m = re.search(r"String\s+(\w+(,\s*\w+)+)\s*;", f_line) if m: token_names = sets.Set([ tokname.strip() for tokname in m.group(1).split(',') ]) state = 'switch' else: print_err( "failed regex match: final String ...") else: print_err("expected: final String") elif state == 'switch': if re.search(r"^switch\w*\(dbType\)$", f_line): state = '{' else: print_err("failed regex match") elif state == '{': if f_line == '{': state = 'case' # expects case:, default:, or '}' elif state == 'case': if f_line == '}': state = 'end' saw_all_expected = True elif f_line == 'default:': state = 'caseline' curr_case_dbtype = 'default' token_dbtype_vals[curr_case_dbtype] = {} else: m = re.search(r"^case\s+DBTYPE_(\w+)\s*:", f_line) if m: state = 'caseline' curr_case_dbtype = m.group(1).lower() if curr_case_dbtype == 'postgresql': curr_case_dbtype = 'postgres' token_dbtype_vals[curr_case_dbtype] = {} else: print_err( "failed regex match: case DBTYPE_...") elif state == 'caseline': if f_line == 'break;': state = 'case' else: m = re.search(r'^(\w+)\s*=\s*"([^"]*)";\s*$', f_line) if m: token_dbtype_vals[curr_case_dbtype][m.group( 1)] = m.group(2) else: print_err( "failed regex match: var assign | break") elif state == 'end': print_err("expected: END COMPARISON AREA") if not saw_begin_line: print(progname + ': Missing "BEGIN COMPARISON AREA" in ' + dbh_java_fullpath) java_all_ok = False # Check if all dbtypes (including default) have the same set of token_names for dbtype in token_dbtype_vals.keys(): diffr = (token_names ^ (sets.ImmutableSet(token_dbtype_vals[dbtype].keys()))) if len(diffr): java_all_ok = False print( progname + ": SOCDBHelper.upgradeSchema token sets differ: String declaration vs dbtype " + dbtype + ": " + ", ".join(diffr)) # Check that dbtypes here (besides default) are same as render.DB_TOKENS dbtypes_set = sets.Set(token_dbtype_vals.keys()) dbtypes_set.remove('default') diffr = (dbtypes_set ^ (sets.ImmutableSet(render.DB_TOKENS.keys()))) if len(diffr): java_all_ok = False print( progname + ": SOCDBHelper.upgradeSchema db types differ vs render.DB_TOKENS: " + ", ".join(diffr)) # For java token names, check token values vs render.DB_TOKENS for non-default dbtypes if java_all_ok: for dbtype in dbtypes_set: for token_name in token_names: if render.DB_TOKENS[dbtype][ token_name] != token_dbtype_vals[dbtype][ token_name]: if java_all_ok: print( progname + ": SOCDBHelper.upgradeSchema token value differs from render.DB_TOKENS:" ) java_all_ok = False print("- DBTYPE_" + dbtype.upper() + ": token " + token_name) return java_all_ok except IOError as e: print(progname + ": Error reading " + dbh_java_fullpath + ": " + str(e)) return False
def set_merge(set1, set2): return list(sets.Set(set1) | sets.Set(set2))
def main(metadata_path, output_path, print_source=False): metadata_path = os.path.abspath(metadata_path) metadata_dir = os.path.dirname(metadata_path) meta_loader = MetaLoader() data = meta_loader.load(metadata_path) action_name = data['name'] entry_point = data['entry_point'] workflow_metadata_path = os.path.join(metadata_dir, entry_point) chainspec = meta_loader.load(workflow_metadata_path) chain_holder = ChainHolder(chainspec, 'workflow') graph_label = '%s action-chain workflow visualization' % (action_name) graph_attr = { 'rankdir': 'TD', 'labelloc': 't', 'fontsize': '15', 'label': graph_label } node_attr = {} dot = Digraph(comment='Action chain work-flow visualization', node_attr=node_attr, graph_attr=graph_attr, format='png') # dot.body.extend(['rankdir=TD', 'size="10,5"']) # Add all nodes node = chain_holder.get_next_node() while node: dot.node(node.name, node.name) node = chain_holder.get_next_node(curr_node_name=node.name) # Add connections node = chain_holder.get_next_node() processed_nodes = sets.Set([node.name]) nodes = [node] while nodes: previous_node = nodes.pop() success_node = chain_holder.get_next_node( curr_node_name=previous_node.name, condition='on-success') failure_node = chain_holder.get_next_node( curr_node_name=previous_node.name, condition='on-failure') # Add success node (if any) if success_node: dot.edge(previous_node.name, success_node.name, constraint='true', color='green', label='on success') if success_node.name not in processed_nodes: nodes.append(success_node) processed_nodes.add(success_node.name) # Add failure node (if any) if failure_node: dot.edge(previous_node.name, failure_node.name, constraint='true', color='red', label='on failure') if failure_node.name not in processed_nodes: nodes.append(failure_node) processed_nodes.add(failure_node.name) if print_source: print(dot.source) if output_path: output_path = os.path.join(output_path, action_name) else: output_path = output_path or os.path.join(os.getcwd(), action_name) dot.format = 'png' dot.render(output_path) print('Graph saved at %s' % (output_path + '.png'))
def __init__(self, name): self.name = name self.file = open(name + ".txt", "wt") self.filenames = sets.Set() self.index = 0
def reset(): global _directories _directories = sets.Set()
def __init__( D, line ): discgraph.DiscGraph.__init__( D, line ) ## List of referents that participate in elementary predications (which does not include the eventuality / elementary predication itself)... D.Participants = sets.Set([ x for pred in D.PredTuples for x in pred[2:] ]) ## List of heirs for each inherited referent... D.Legators = collections.defaultdict( list ) D.Heirs = collections.defaultdict( list ) for xLo in D.Referents: D.Legators[ xLo ] = D.getLegators( xLo ) for xHi in D.Referents: D.Heirs[ xHi ] = D.getHeirs( xHi ) if VERBOSE: print( 'Legators = ' + str(D.Legators) ) if VERBOSE: print( 'Heirs = ' + str(D.Heirs) ) def getTopUnaryLegators( xLo ): L = [ xLeg for l,xHi in D.Inhs.get( xLo, {} ).items() if l!='w' and l!='o' and len( D.Subs.get(xHi,[]) ) < 2 for xLeg in getTopUnaryLegators(xHi) ] return L if L != [] else [ xLo ] # if D.Inhs.get( xLo, {} ).items() != [] else [ xLo ] # UnaryL = [ xLeg for xLeg in D.Legators.get(xLo,[]) if all([ xLo in D.Heirs.get(xHeir,[]) for xHeir in D.Legators.get(xLo,[]) if xHeir in D.Heirs.get(xLeg,[]) ]) ] # return [ x for x in UnaryL if not any([ x in D.Heirs.get(y,[]) for y in UnaryL if y != x ]) ] def getTopLegators( xLo ): L = [ xLeg for l,xHi in D.Inhs.get( xLo, {} ).items() if l!='w' and l!='o' for xLeg in getTopLegators(xHi) ] return L if L != [] else [ xLo ] # if D.Inhs.get( xLo, {} ).items() != [] else [ xLo ] D.TopLegators = { xLo : sets.Set( getTopLegators(xLo) ) for xLo in D.Inhs } if VERBOSE: print( 'TopLegators = ' + str(D.TopLegators) ) D.TopUnaryLegators = { xLo : sets.Set( getTopUnaryLegators(xLo) ) for xLo in D.Inhs } if VERBOSE: print( 'TopUnaryLegators = ' + str(D.TopUnaryLegators) ) # D.PredRecency = { } ## List of heirs for each participant... D.HeirsOfParticipants = [ xLo for xHi in D.Participants for xLo in D.Heirs.get(xHi,[]) ] if VERBOSE: print( 'HeirsOfParticipants = ' + str(D.HeirsOfParticipants) ) ## Obtain inheritance chain for each reft... D.Chains = { x : sets.Set( D.getChainFromSup(x) + D.getChainFromSub(x) ) for x in D.Referents } if VERBOSE: print( 'Chains = ' + str(D.Chains) ) # Inheritances = { x : sets.Set( getChainFromSup(x) ) for x in Referents } ## Mapping from referent to elementary predications containing it... # D.RefToPredTuples = { xOrig : [ (ptup,xInChain) for xInChain in D.Chains[xOrig] for ptup in D.PredTuples if xInChain in ptup[2:] ] for xOrig in D.Referents } def orderTuplesFromSups( x ): Out = [] if x in D.Nuscos: for src in D.Nuscos[x]: Out += [ (ptup,src) for ptup in D.PredTuples if src in ptup[2:] ] Out += [ (ptup,x) for ptup in D.PredTuples if x in ptup[2:] ] for lbl,dst in D.Inhs.get(x,{}).items(): Out += orderTuplesFromSups( dst ) return Out def orderTuplesFromSubs( x ): Out = [] Out += [ (ptup,x) for ptup in D.PredTuples if x in ptup[2:] ] for src in D.Subs.get(x,[]): Out += orderTuplesFromSubs( src ) # Out += [ (ptup,src) for ptup in D.PredTuples if src in ptup[2:] ] return Out D.FullRefToPredTuples = { x : sets.Set( orderTuplesFromSubs(x) + orderTuplesFromSups(x) ) for x in D.Referents } D.WeakRefToPredTuples = { x : orderTuplesFromSubs( D.Inhs.get(x,{}).get('r',x) ) for x in D.Referents } D.BareRefToPredTuples = { x : [ (ptup,x) for ptup in D.PredTuples if x in ptup[2:] ] for x in D.Referents } if VERBOSE: print( 'FullRefToPredTuples = ' + str(D.FullRefToPredTuples) ) if VERBOSE: print( 'WeakRefToPredTuples = ' + str(D.WeakRefToPredTuples) ) if VERBOSE: print( 'BareRefToPredTuples = ' + str(D.BareRefToPredTuples) ) def constrainingTuplesFromSups( x ): return [ ptup for ptup in D.PredTuples if x in ptup[1:] ] + [ ptup for _,xHi in D.Inhs.get(x,{}).items() for ptup in constrainingTuplesFromSups( xHi ) ] def constrainingTuplesFromSubs( x ): return [ ptup for ptup in D.PredTuples if x in ptup[1:] ] + [ ptup for xLo in D.Subs.get(x,[]) for ptup in constrainingTuplesFromSubs( xLo ) ] D.ConstrainingTuples = { x : sets.Set( constrainingTuplesFromSups(x) + constrainingTuplesFromSubs(x) ) for x in D.Referents } ## Calculate ceilings of scoped refts... # D.AnnotatedCeilings = sets.Set([ y for y in D.Referents for x in D.Scopes.keys() if D.ceiling(x) in D.Chains[y] ]) #D.Chains[D.ceiling(x)] for x in D.Scopes.keys() ]) # if len(D.AnnotatedCeilings) == 0: # D.AnnotatedCeilings = sets.Set( sorted([ (len(chain),chain) for x,chain in D.Chains.items() if x.startswith('000') ])[-1][1] ) # sets.Set(D.Chains['0001s']) # print( '#NOTE: Discourse contains no scope annotations -- defining root as longest chain through first sentence: ' + str(sorted(D.AnnotatedCeilings)) ) # sys.stderr.write( 'NOTE: Discourse contains no scope annotations -- defining root as longest chain through first sentence: ' + str(sorted(D.AnnotatedCeilings)) + '\n' ) # DisjointCeilingPairs = [ (x,y) for x in D.AnnotatedCeilings for y in D.AnnotatedCeilings if x<y and not D.reachesInChain( x, y ) ] # if len(DisjointCeilingPairs) > 0: # print( '#WARNING: Maxima of scopal annotations are disjoint: ' + str(DisjointCeilingPairs) + ' -- disconnected annotations cannot all be assumed dominant.' ) # sys.stderr.write( 'WARNING: Maxima of scopal annotations are disjoint: ' + str(DisjointCeilingPairs) + ' -- disconnected annotations cannot all be assumed dominant.\n' ) # if VERBOSE: print( 'AnnotatedCeilings = ' + str(D.AnnotatedCeilings) ) # D.NotOutscopable = [ x for x in D.Referents if D.ceiling(x) in D.AnnotatedCeilings ] # if VERBOSE: print( 'NotOutscopable = ' + str(D.NotOutscopable) ) D.PredToTuple = { xOrig : ptup for ptup in D.PredTuples for xOrig in D.Chains[ ptup[1] ] } if VERBOSE: print( 'PredToTuple = ' + str(D.PredToTuple) ) def allInherited( src ): Out = [] for lbl,dst in D.Inhs.get(src,{}).items(): if lbl!='w' and lbl!='o': Out += [ dst ] + allInherited( dst ) return Out D.AllInherited = { x : allInherited( x ) for x in D.Referents } if VERBOSE: print( 'AllInherited = ' + str(D.AllInherited) )
if line.startswith(" -- adding "): sys.stdout.write(".") else: sys.stdout.write(line) sys.stdout.flush() if not os.path.exists(self.name + ".cab"): raise IOError, "cabarc failed" add_data(db, "Media", [(1, self.index, None, "#" + self.name, None, None)]) add_stream(db, self.name, self.name + ".cab") os.unlink(self.name + ".txt") os.unlink(self.name + ".cab") db.Commit() _directories = sets.Set() class Directory: def __init__(self, db, cab, basedir, physical, _logical, default, componentflags=None): """Create a new directory in the Directory table. There is a current component at each point in time for the directory, which is either explicitly created through start_component, or implicitly when files are added for the first time. Files are added into the current component, and into the cab file.
def getCeils( D, xHi ): # print( 'ceil of ' + xHi ) return D.getCeils( D.Scopes[xHi] ) if xHi in D.Scopes else sets.Set([ y for xLo in D.Subs.get(xHi,[]) for y in D.getCeils(xLo) ]) if len(D.Subs.get(xHi,[]))>0 else [ xHi ]
def carregarDadosCVLattes(self): cvPath = self.diretorioCache + '/' + self.idLattes if 'xml' in cvPath: arquivoX = open(cvPath) cvLattesXML = arquivoX.read() arquivoX.close() extended_chars = u''.join( unichr(c) for c in xrange(127, 65536, 1)) # srange(r"[\0x80-\0x7FF]") special_chars = ' -' '' cvLattesXML = cvLattesXML.decode( 'iso-8859-1', 'replace') + extended_chars + special_chars parser = ParserLattesXML(self.idMembro, cvLattesXML) self.idLattes = parser.idLattes self.url = parser.url print "(*) Utilizando CV armazenado no cache: " + cvPath else: if os.path.exists(cvPath): arquivoH = open(cvPath) cvLattesHTML = arquivoH.read() if self.idMembro != '': print "(*) Utilizando CV armazenado no cache: " + cvPath else: cvLattesHTML = '' tentativa = 0 while tentativa < 5: #while True: try: txdata = None txheaders = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0) Gecko/20100101 Firefox/4.0', 'Accept-Language': 'en-us,en;q=0.5', 'Accept-Encoding': 'deflate', 'Keep-Alive': '115', 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Cookie': 'style=standard; __utma=140185953.294397416.1313390179.1313390179.1317145115.2; __utmz=140185953.1317145115.2.2.utmccn=(referral)|utmcsr=emailinstitucional.cnpq.br|utmcct=/ei/emailInstitucional.do|utmcmd=referral; JSESSIONID=1B98ABF9642E01597AABA0F7A8807FD1.node2', } print "Baixando CV :" + self.url req = urllib2.Request( self.url, txdata, txheaders) # Young folks by P,B&J! arquivoH = urllib2.urlopen(req) cvLattesHTML = arquivoH.read() arquivoH.close() time.sleep(1) if len(cvLattesHTML) <= 2000: print '[AVISO] O scriptLattes tentará baixar novamente o seguinte CV Lattes: ', self.url time.sleep(30) tentativa += 1 continue if not self.diretorioCache == '': file = open(cvPath, 'w') file.write(cvLattesHTML) file.close() print " (*) O CV está sendo armazenado no Cache" break ### except urllib2.URLError: ###, e: except: print '[AVISO] Nao é possível obter o CV Lattes: ', self.url print '[AVISO] Certifique-se que o CV existe. O scriptLattes tentará baixar o CV em 30 segundos...' ###print '[ERRO] Código de erro: ', e.code time.sleep(30) tentativa += 1 continue extended_chars = u''.join( unichr(c) for c in xrange(127, 65536, 1)) # srange(r"[\0x80-\0x7FF]") special_chars = ' -' '' #cvLattesHTML = cvLattesHTML.decode('ascii','replace')+extended_chars+special_chars # Wed Jul 25 16:47:39 BRT 2012 cvLattesHTML = cvLattesHTML.decode( 'iso-8859-1', 'replace') + extended_chars + special_chars parser = ParserLattes(self.idMembro, cvLattesHTML) p = re.compile('[a-zA-Z]+') if p.match(self.idLattes): self.identificador10 = self.idLattes self.idLattes = parser.identificador16 self.url = 'http://lattes.cnpq.br/' + self.idLattes # ----------------------------------------------------------------------------------------- # Obtemos todos os dados do CV Lattes self.nomeCompleto = parser.nomeCompleto self.bolsaProdutividade = parser.bolsaProdutividade self.enderecoProfissional = parser.enderecoProfissional self.sexo = parser.sexo self.nomeEmCitacoesBibliograficas = parser.nomeEmCitacoesBibliograficas self.atualizacaoCV = parser.atualizacaoCV self.textoResumo = parser.textoResumo self.foto = parser.foto self.listaIDLattesColaboradores = parser.listaIDLattesColaboradores self.listaFormacaoAcademica = parser.listaFormacaoAcademica self.listaProjetoDePesquisa = parser.listaProjetoDePesquisa self.listaAreaDeAtuacao = parser.listaAreaDeAtuacao self.listaIdioma = parser.listaIdioma self.listaPremioOuTitulo = parser.listaPremioOuTitulo self.listaIDLattesColaboradoresUnica = sets.Set( self.listaIDLattesColaboradores) # Produção bibliográfica self.listaArtigoEmPeriodico = parser.listaArtigoEmPeriodico self.listaLivroPublicado = parser.listaLivroPublicado self.listaCapituloDeLivroPublicado = parser.listaCapituloDeLivroPublicado self.listaTextoEmJornalDeNoticia = parser.listaTextoEmJornalDeNoticia self.listaTrabalhoCompletoEmCongresso = parser.listaTrabalhoCompletoEmCongresso self.listaResumoExpandidoEmCongresso = parser.listaResumoExpandidoEmCongresso self.listaResumoEmCongresso = parser.listaResumoEmCongresso self.listaArtigoAceito = parser.listaArtigoAceito self.listaApresentacaoDeTrabalho = parser.listaApresentacaoDeTrabalho self.listaOutroTipoDeProducaoBibliografica = parser.listaOutroTipoDeProducaoBibliografica # Produção técnica self.listaSoftwareComPatente = parser.listaSoftwareComPatente self.listaSoftwareSemPatente = parser.listaSoftwareSemPatente self.listaProdutoTecnologico = parser.listaProdutoTecnologico self.listaProcessoOuTecnica = parser.listaProcessoOuTecnica self.listaTrabalhoTecnico = parser.listaTrabalhoTecnico self.listaOutroTipoDeProducaoTecnica = parser.listaOutroTipoDeProducaoTecnica # Patentes e registros self.listaPatente = parser.listaPatente self.listaProgramaComputador = parser.listaProgramaComputador self.listaDesenhoIndustrial = parser.listaDesenhoIndustrial # Produção artística self.listaProducaoArtistica = parser.listaProducaoArtistica # Orientações em andamento self.listaOASupervisaoDePosDoutorado = parser.listaOASupervisaoDePosDoutorado self.listaOATeseDeDoutorado = parser.listaOATeseDeDoutorado self.listaOADissertacaoDeMestrado = parser.listaOADissertacaoDeMestrado self.listaOAMonografiaDeEspecializacao = parser.listaOAMonografiaDeEspecializacao self.listaOATCC = parser.listaOATCC self.listaOAIniciacaoCientifica = parser.listaOAIniciacaoCientifica self.listaOAOutroTipoDeOrientacao = parser.listaOAOutroTipoDeOrientacao # Orientações concluídas self.listaOCSupervisaoDePosDoutorado = parser.listaOCSupervisaoDePosDoutorado self.listaOCTeseDeDoutorado = parser.listaOCTeseDeDoutorado self.listaOCDissertacaoDeMestrado = parser.listaOCDissertacaoDeMestrado self.listaOCMonografiaDeEspecializacao = parser.listaOCMonografiaDeEspecializacao self.listaOCTCC = parser.listaOCTCC self.listaOCIniciacaoCientifica = parser.listaOCIniciacaoCientifica self.listaOCOutroTipoDeOrientacao = parser.listaOCOutroTipoDeOrientacao # Eventos self.listaParticipacaoEmEvento = parser.listaParticipacaoEmEvento self.listaOrganizacaoDeEvento = parser.listaOrganizacaoDeEvento
def __init__(self, arquivo): self.arquivoConfiguracao = arquivo self.carregarParametrosPadrao() # atualizamos a lista de parametros for linha in fileinput.input(self.arquivoConfiguracao): linha = linha.replace("\r", "") linha = linha.replace("\n", "") linhaPart = linha.partition("#") # eliminamos os comentários linhaDiv = linhaPart[0].split("=", 1) if len(linhaDiv) == 2: self.atualizarParametro(linhaDiv[0], linhaDiv[1]) # carregamos o periodo global ano1 = self.obterParametro('global-itens_desde_o_ano') ano2 = self.obterParametro('global-itens_ate_o_ano') if ano1.lower() == 'hoje': ano1 = str(datetime.datetime.now().year) if ano2.lower() == 'hoje': ano2 = str(datetime.datetime.now().year) if ano1 == '': ano1 = '0' if ano2 == '': ano2 = '10000' self.itemsDesdeOAno = int(ano1) self.itemsAteOAno = int(ano2) self.diretorioCache = self.obterParametro( 'global-diretorio_de_armazenamento_de_cvs') if self.diretorioCache == '': self.diretorioCache = os.path.expanduser( os.path.join("~", ".scriptLattes", "cacheCV")) util.criarDiretorio(self.diretorioCache) self.diretorioDoi = self.obterParametro( 'global-diretorio_de_armazenamento_de_doi') if self.diretorioDoi == '': self.diretorioDoi = os.path.expanduser( os.path.join("~", ".scriptLattes", "cacheDoi")) util.criarDiretorio(self.diretorioDoi) # carregamos a lista de membros entrada = buscarArquivo( self.obterParametro('global-arquivo_de_entrada')) idSequencial = 0 for linha in fileinput.input(entrada): linha = linha.replace("\r", "") linha = linha.replace("\n", "") linhaPart = linha.partition("#") # eliminamos os comentários linhaDiv = linhaPart[0].split(",") if not linhaDiv[0].strip() == '': identificador = linhaDiv[0].strip( ) if len(linhaDiv) > 0 else '' nome = linhaDiv[1].strip() if len(linhaDiv) > 1 else '' periodo = linhaDiv[2].strip() if len(linhaDiv) > 2 else '' rotulo = linhaDiv[3].strip( ) if len(linhaDiv) > 3 and not linhaDiv[3].strip( ) == '' else '[Sem rotulo]' # rotulo = rotulo.capitalize() # atribuicao dos valores iniciais para cada membro ###if 'xml' in identificador.lower(): ###### self.listaDeMembros.append(Membro(idSequencial, '', nome, periodo, rotulo, self.itemsDesdeOAno, self.itemsAteOAno, xml=identificador)) ### self.listaDeMembros.append(Membro(idSequencial, identificador, nome, periodo, rotulo, self.itemsDesdeOAno, self.itemsAteOAno, diretorioCache)) ###else: self.listaDeMembros.append( Membro(idSequencial, identificador, nome, periodo, rotulo, self.itemsDesdeOAno, self.itemsAteOAno, self.diretorioCache)) self.listaDeRotulos.append(rotulo) idSequencial += 1 self.listaDeRotulos = list(sets.Set( self.listaDeRotulos)) # lista unica de rotulos self.listaDeRotulos.sort() self.listaDeRotulosCores = [''] * len(self.listaDeRotulos) self.qualis = Qualis( self ) # carregamos Qualis a partir de arquivos definidos no arquivo de configuração
return None else: parentPath, _ = os.path.split(self.path) return self.__class__(parentPath, self.dn.up()) def _sync_children(self): children = [] try: filenames = os.listdir(self.path) except OSError, e: if e.errno == errno.ENOENT: pass else: raise else: seen = sets.Set() for fn in filenames: base, ext = os.path.splitext(fn) if ext not in ['.dir', '.ldif']: continue if base in seen: continue seen.add(base) dn = distinguishedname.DistinguishedName(listOfRDNs=( (distinguishedname.RelativeDistinguishedName(base), ) + self.dn.split())) e = self.__class__(os.path.join(self.path, base + '.dir'), dn) children.append(e) return children
class MetaLowRankBandit(object): ''' classdocs ''' def __init__(self): ''' Constructor ''' def check(self, set1): testset = self.bestAction print "BestSet: " + str(testset) print "TestSet: " + str(set1) if len(set1) < len(testset) or len(set1) > len(testset): return False for i in range(0,len(set1)): # if set[i]!=testset[i] or set1[i]!=testset[i]: if set1[i] != testset[i]: return False return True def User_Nature(self): #ROUND ROBIN return self.t%self.users #UNIFORM SAMPLING #return random.randint(0,self.users-1) def rewards(self, user, choice): # Noise Free #return self.means[user][choice] # Noisy # return random.gauss(self.means[user][choice],0.25) return sum(numpy.random.binomial(1, self.means[user][choice], 1)) / 1.0 def upperBound(self, numPlays): #return 0.0 alpha = 2.0 psi = 2.0 if numPlays == 0: return self.MAX else: return math.sqrt(alpha * (math.log((psi * self.numRounds)/(self.t+1)) / (numPlays))) #Read Environment def readenv(self, readfile): data = [] #filename = "env/env1/AP" + str(p) + ".txt" filename = readfile for line in fileinput.input([filename]): try: line1 = [line.split(", ") or line.split("\n")] # print numpy.shape(line1) # print line1 take = [] for i in range(len(line1[0])): take.append(float(line1[0][i])) # print take data.append(take) except ValueError, e: print e # print data for i in range(0,self.users): self.means[i] = (data[i]) self.bestAction[i] = max(range(self.numActions), key=lambda j: self.means[i][j]) #self.variance = (data[1]) # print self.means # print self.variance print self.bestAction take =sets.Set(self.bestAction) sum1 = [] for col in take: count = 0 for col1 in range(0,len(self.bestAction)): if col == self.bestAction[col1]: count = count + 1 sum1.append(count) print take, sum1
return True except(IOError): return None if len(sys.argv) != 2: print "\n\tUsage: ./alphadbgen.py <wordlist>" print "\n\tEx: ./alphadbgen.py wordlist.txt\n" sys.exit(1) try: words = open(sys.argv[1], "r").readlines() except(IOError), msg: print "[-] Error:",msg,"\n" sys.exit(1) words = list(sets.Set(words)) print "\n[+] Length:",len(words),"\n" for word in words: word = word.replace("\n","") hash = md5gen(word) if testdup(hash) == None: print hash+":"+word db = open(hash[0]+".txt", "a") print "Writing:",hash[0]+".txt" db.writelines(hash+":"+word+"\n") db.close() else: print "Duplicate Found:",hash+":"+word print "\n[+] Databases Complete\n"
def MetaLowRankBandit(self, users, numActions, rank, readfile, writefile): # Set the environment self.MAX = 99999.0 self.MIN = -99999.0 self.numActions = numActions self.users = users self.rank = rank self.explore = pow(self.rank,3) self.payoffSums = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.numPlays = [[0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.estR = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.theta = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.ucbs = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.colReward = [0.0 for i in range(0,self.numActions)] #self.upbs = [0] * self.numActions # self.numRounds = 3000000 self.numRounds = 4000000 # numRounds = 250000 self.arm_reward = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] self.bestAction = [0 for i in range(0,self.users)] self.bestSet = [0 for i in range(0, self.users)] self.means = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.users)] ''' for user in range(self.users): for col in range(self.numActions): #theta[i]=random.gauss((self.payoffSums[i]/(self.numPlays[i]+1)) ,(1.0/(self.numPlays[i]+1))) self.theta[user][col]=random.betavariate((self.payoffSums[user][col]+1.0),(self.numPlays[user][col]-self.payoffSums[user][col]+1.0)) ''' # Read the environment self.readenv(readfile) print self.means print self.bestAction self.t = 0 #self.gamma = 1/math.sqrt(self.t+1) #self.gamma = 0.01 self.gamma = math.sqrt((self.numActions*math.log(self.numActions))/self.numRounds) self.weights = [[1.0 for i in range(0, self.numActions)] for j in range (0,self.rank)] self.prob = [[0.0 for i in range(0, self.numActions)] for j in range (0,self.rank)] for bandit in range(0,self.rank): for col in range(0,self.numActions): self.prob[bandit][col] = (1.0-self.gamma)*(self.weights[bandit][col]/(sum(self.weights[bandit]))) + (self.gamma/self.numActions) #self.cumulativeColReward = [0.0 for i in range(0, self.numActions)] self.cumulativeReward = 0 self.bestActionCumulativeReward = 0 self.actionRegret = [] self.bestActionSet = [] while True: action = random.randint(0,self.numActions-1) if action not in self.bestActionSet: self.bestActionSet.append(action) if len(self.bestActionSet) >= self.rank: break print self.bestActionSet count = 0 self.R = [] self.a = [] while True: self.user_nature = self.User_Nature() #self.action = random.choice(self.bestActionSet) for col in range(0,self.numActions): if col in self.bestActionSet: self.ucbs[self.user_nature][col] = self.estR[self.user_nature][col] + self.upperBound(self.numPlays[self.user_nature][col]) #print self.ucbs[self.user_nature], self.numPlays[self.user_nature] max_val = self.MIN max_index = -1 for col in range(0,self.numActions): if col in self.bestActionSet: if max_val < self.ucbs[self.user_nature][col]: max_val = self.ucbs[self.user_nature][col] max_index = col self.action = max_index #self.action=max(range(0,self.numActions), key=lambda col: self.ucbs[self.user_nature][col]) self.select_Col() self.a.append(self.action) self.aS = sets.Set(self.a) #count = count + 1 #count = if len(self.aS) >= self.rank: #print len(self.aS), self.aS #print self.a, self.R #self.aS = sorted(self.aS) self.changeWeight() while True: self.metaBanditEXP3_1() #self.bestActionSet = sorted(self.bestActionSet) if len(self.bestActionSet) >= self.rank: break count = 0 self.R = [] self.a = [] # print t if self.t % 1000 == 0: print "At time: " + str(self.t), ", action: " +str(self.action), ", best: " + str(self.bestAction[self.user_nature]) , ", regret:", str(self.regret) print self.bestActionSet #print self.prob #print sum(self.prob[0]),sum(self.prob[1]) ''' print self.weights #print self.colReward ''' if self.t >= self.numRounds: break #action = max(range(self.numActions), key=lambda i: self.arm_reward[i]) # print self.arm_reward for user in range(0,self.users): self.bestSet[user] = max(range(0,self.numActions), key=lambda i: self.numPlays[user][i]) f = open(writefile + 'testRegretMetaEXP0RR5.txt', 'a') for r in range(len(self.actionRegret)): f.write(str(self.actionRegret[r]) + "\n") f.close() return self.cumulativeReward, self.bestActionCumulativeReward, self.regret, self.action, self.t, self.bestSet
def Gen(fn_pair): try: start_time = time.time() # players_00433_20160726155140 # 0123456789012345678901234567 ts_lower = datetime.datetime.strptime(fn_pair[0][16:], "%y%m%d%H%M%S") if fn_pair[1] is None: ts_upper = None else: ts_upper = datetime.datetime.strptime(fn_pair[1][16:], "%y%m%d%H%M%S") ts_base = None fn1 = "%s/%s" % (_dn_in, fn_pair[0]) fn_out = "%s/%s" % (_dn_out, fn_pair[0][16:22] + "-" + fn_pair[0][22:]) ts_prev = None # Object IDs with the same timestamp obj_ids_w = sets.Set() obj_ids_r = sets.Set() num_skipped = 0 with open(fn1) as fo_in, open(fn_out, "w") as fo_out: i = 0 for line in fo_in: i += 1 #if i % 10000 == 0: # sys.stdout.write(".") # sys.stdout.flush() t = line.strip().split(" ") if len(t) != 3: raise RuntimeError("Unexpected [%s]" % line) ts = float(t[0]) if ts_base is None: ts_base = ts # When ts wraps around if (ts_prev is not None) and (ts < ts_prev): ts_base -= 4294 ts1 = ts_lower + datetime.timedelta(seconds=(ts - ts_base)) obj_id = t[2] op = t[1].replace("get", "G").replace("set", "S") # Skip timestamp not in the proper range. It happens due to the lack of # precision of the datetime in the file names. if (ts1 < ts_lower) or ((ts_upper is not None) and (ts_upper < ts1)): num_skipped += 1 continue if ts != ts_prev: obj_ids_w.clear() obj_ids_r.clear() if op == "S": obj_ids_w.add(obj_id) elif op == "G": obj_ids_r.add(obj_id) fo_out.write( "%s %s %s\n" % (ts1.strftime("%y%m%d-%H%M%S.%f"), op, obj_id)) ts_prev = ts else: if op == "S": if obj_id not in obj_ids_w: fo_out.write( "%s %s %s\n" % (ts1.strftime("%y%m%d-%H%M%S.%f"), op, obj_id)) obj_ids_w.add(obj_id) elif op == "G": if obj_id not in obj_ids_r: fo_out.write( "%s %s %s\n" % (ts1.strftime("%y%m%d-%H%M%S.%f"), op, obj_id)) obj_ids_r.add(obj_id) Cons.P("Created %s %d in %.0f ms.%s" % \ (fn_out, os.path.getsize(fn_out), (time.time() - start_time) * 1000.0, \ ((" Skipped %d due to out-of-order ts range" % num_skipped) if num_skipped > 0 else ""))) except Exception as e: Cons.P("Error while processing %s\n%s\n%s\n" % (fn_pair[0], e, traceback.format_exc()))
__required_arguments = [ CommandLineArgUtil.ORACLE_HOME_SWITCH, CommandLineArgUtil.MODEL_FILE_SWITCH, CommandLineArgUtil.OUTPUT_DIR_SWITCH, CommandLineArgUtil.TARGET_SWITCH ] __optional_arguments = [ CommandLineArgUtil.VARIABLE_FILE_SWITCH ] all_changes = [] all_added = [] all_removed = [] compare_msgs = sets.Set() def __process_args(args): """ Process the command-line arguments. :param args: the command-line arguments list :raises CLAException: if an error occurs while validating and processing the command-line arguments """ _method_name = '__process_args' cla_util = CommandLineArgUtil(_program_name, __required_arguments, __optional_arguments) cla_util.set_allow_multiple_models(True) argument_map = cla_util.process_args(args) target_configuration_helper.process_target_arguments(argument_map)
def __init__(self, keep): self.keep = sets.Set(map(ord, keep))
def receiveClose(self): return sets.Set(self.list), None
def createBagOfWords(self): keywords = self.text.split(' ') for keyword in keywords: if keyword.startswith('https://'): keywords.remove(keyword) self.bagOfWords = sets.Set(keywords)
def write_taxonomy_and_seqinfo_files(self, taxonomies, output_taxonomy_file, output_seqinfo_file): '''Write out taxonomy and seqinfo files as required by taxtastic from known taxonomies Parameters ---------- taxonomies: hash of taxon_id to array of taxonomic information output_taxonomy_file: write taxtastic-compatible 'taxonomy' file here output_seqinfo_file: write taxtastic-compatible 'seqinfo' file here''' first_pass_id_and_taxonomies = [] tc = TaxonomyCleaner() max_number_of_ranks = 0 for taxon_id, tax_split in taxonomies.iteritems(): # Replace spaces with underscores e.g. 'Candidatus my_genus' for idx, item in enumerate(tax_split): tax_split[idx] = re.sub('\s+', '_', item.strip()) # Remove 'empty' taxononomies e.g. 's__' tax_split = tc.remove_empty_ranks(tax_split) # Add this fixed up list to the list first_pass_id_and_taxonomies.append([taxon_id] + tax_split) if len(tax_split) > max_number_of_ranks: max_number_of_ranks = len(tax_split) # Find taxons that have multiple parents, building a hash of parents as we go (i.e. a tree of taxonomies embedded in a hash) # # Assumes that no two taxonomic labels are the same when they are from different # taxonomic levels. When there are children with multiple parents at the # same taxonomic label then these are warned about and worked around. parents = {} #hash of taxon to immediate parent known_duplicates = sets.Set([]) for j, array in enumerate(first_pass_id_and_taxonomies): taxonomy = array[1:] for i, tax in enumerate(taxonomy): if i == 0: continue #top levels don't have parents ancestry = taxonomy[i - 1] if parents.has_key(tax): if parents[tax] != ancestry: dup = "%s%s" % (parents[tax], ancestry) # don't report the same problem several times if dup not in known_duplicates: print " %s '%s' with multiple parents %s and %s" % ( array[0], tax, parents[tax], ancestry) known_duplicates.add(dup) # fix the current one new_name_id = 1 new_name = "%se%s" % (tax, new_name_id) while parents.has_key( new_name) and parents[new_name] != ancestry: new_name_id += 1 new_name = "%se%s" % (tax, new_name_id) first_pass_id_and_taxonomies[j][i + 1] = new_name taxonomy[i] = new_name parents[new_name] = ancestry else: # normal case, seeing a new taxon and parent for the first time parents[tax] = ancestry # Write the sequence file with open(output_seqinfo_file, 'w') as seqout: # write header seqout.write('seqname,tax_id\n') # write each taxonomic association for array in first_pass_id_and_taxonomies: sequence_id = array[0] if len(array) == 1: most_specific_taxonomic_affiliation = 'Root' else: most_specific_taxonomic_affiliation = array[-1] seqout.write("%s,%s\n" % (array[0], most_specific_taxonomic_affiliation)) # Write the taxonomy file noted_taxonomies = sets.Set([]) taxonomic_level_names = [ "rank_%i" % rank for rank in range(max_number_of_ranks) ] with open(output_taxonomy_file, 'w') as seqout: # write header and root line seqout.write( ','.join(['tax_id', 'parent_id', 'rank', 'tax_name', 'root'] + taxonomic_level_names) + '\n') seqout.write(','.join(['Root', 'Root', 'root', 'Root', 'Root']) + ''.join([','] * max_number_of_ranks) + '\n') # write all the taxonomies for array in first_pass_id_and_taxonomies: taxons = array[1:] for i, tax in enumerate(taxons): line = self._taxonomy_line(i, taxons[:(i + 1)], taxonomic_level_names) if line not in noted_taxonomies: seqout.write(line + "\n") noted_taxonomies.add(line)
def getChanges(self, opts): """Generate and stash a list of Change dictionaries, ready to be sent to the buildmaster's PBChangeSource.""" # first we extract information about the files that were changed repo = opts['repository'] slave_repo = opts['slave-repo'] or repo print "Repo:", repo rev_arg = '' if opts['revision']: rev_arg = '-r %s' % (opts['revision'], ) changed = commands.getoutput('svnlook changed %s "%s"' % (rev_arg, repo)).split('\n') # the first 4 columns can contain status information changed = [x[4:] for x in changed] message = commands.getoutput('svnlook log %s "%s"' % (rev_arg, repo)) who = commands.getoutput('svnlook author %s "%s"' % (rev_arg, repo)) revision = opts.get('revision') if revision is not None: revision = str(int(revision)) # see if we even need to notify buildbot by looking at filters first changestring = '\n'.join(changed) fltpat = opts['includes'] if fltpat: included = sets.Set(re.findall(fltpat, changestring)) else: included = sets.Set(changed) expat = opts['excludes'] if expat: excluded = sets.Set(re.findall(expat, changestring)) else: excluded = sets.Set([]) if len(included.difference(excluded)) == 0: print changestring print """\ Buildbot was not interested, no changes matched any of these filters:\n %s or all the changes matched these exclusions:\n %s\ """ % (fltpat, expat) sys.exit(0) # now see which branches are involved files_per_branch = {} for f in changed: branch, filename = split_file(f) if branch in files_per_branch.keys(): files_per_branch[branch].append(filename) else: files_per_branch[branch] = [filename] # now create the Change dictionaries changes = [] encoding = opts['encoding'] for branch in files_per_branch.keys(): d = { 'who': unicode(who, encoding=encoding), 'repository': unicode(slave_repo, encoding=encoding), 'comments': unicode(message, encoding=encoding), 'revision': revision, 'project': unicode(opts['project'] or "", encoding=encoding), 'src': 'svn', } if branch: d['branch'] = unicode(branch, encoding=encoding) else: d['branch'] = branch files = [] for file in files_per_branch[branch]: files.append(unicode(file, encoding=encoding)) d['files'] = files changes.append(d) return changes
def osg_size(sql_results, globals=globals(), **kw): """ Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and total CPUs.. """ if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0: normalize = True else: normalize = False utilized_results, md = results_parser(sql_results, globals=globals, **kw) accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size( span=7 * 86400, starttime=time.time() - 7 * 86400 * 52) total_results, _ = globals['GIPQueries'].gip_site_size( span=7 * 86400, starttime=time.time() - 7 * 86400 * 52, max_size=20000) ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts() ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2() ksi2k_results2 = ksi2k_results2['Nebraska'] sites = utilized_results.keys() new_results = {} all_intervals = sets.Set() for site in sites: intervals = utilized_results[site].keys() all_intervals.union_update(intervals) all_intervals = list(all_intervals) all_intervals.sort() total_utilized_results = {} total_accessible_results = {} total_total_results = {} final_results = { 'Used': {}, 'Accessible, but not Used': {}, 'In OSG, but not Accessible': {} } may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0)) avg_ksi2k_results = {} ksi2k_min = min(1.7, ksi2k_results2.values()) ksi2k_max = ksi2k_min for interval in all_intervals: ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max) avg_ksi2k_results[interval] = ksi2k_max prev_interval = 0 for interval in all_intervals: cumulative = 0 for site, vals in utilized_results.items(): if site not in ksi2k_results: ksi2k = avg_ksi2k_results[interval] elif interval not in ksi2k_results[site]: ksi2k = min(min(ksi2k_results[site].values()), avg_ksi2k_results[interval]) else: ksi2k = ksi2k_results[site][interval] if normalize: cumulative += vals.get(interval, 0) * ksi2k else: cumulative += vals.get(interval, 0) total_utilized_results[interval] = cumulative cumulative2 = 0 for site, vals in accessible_results.items(): if site not in ksi2k_results: ksi2k = avg_ksi2k_results[interval] elif interval not in ksi2k_results[site]: ksi2k = min(min(ksi2k_results[site].values()), avg_ksi2k_results[interval]) else: ksi2k = ksi2k_results[site][interval] if normalize: cumulative2 += vals.get(interval, 0) * ksi2k else: cumulative2 += vals.get(interval, 0) total_accessible_results[interval] = cumulative2 cumulative3 = 0 for site, vals in total_results.items(): if site not in ksi2k_results: ksi2k = avg_ksi2k_results[interval] elif interval not in ksi2k_results[site]: ksi2k = min(min(ksi2k_results[site].values()), avg_ksi2k_results[interval]) else: ksi2k = ksi2k_results[site][interval] if normalize: cumulative3 += vals.get(interval, 0) * ksi2k else: cumulative3 += vals.get(interval, 0) total_total_results[interval] = cumulative3 if interval < may_1: continue final_results[USED][interval] = cumulative final_results[ACCESSIBLE][interval] = max(cumulative2 -\ cumulative, 0) final_results[UNACCESSIBLE][interval] = max(cumulative3\ - cumulative2, 0) # Make sure numbers never go down. # This should be true because all the numbers should be cumulative, # but we're just being paranoid here. #for pivot in [ACCESSIBLE, UNACCESSIBLE]: # if prev_interval in final_results[pivot] and final_results[pivot]\ # [prev_interval] > final_results[pivot][interval]: # final_results[pivot][interval] = final_results[pivot]\ # [prev_interval] #prev_interval = interval return final_results, md
def test_list_mixin(list_class=TestList, rand_elem=None): """ Unit test for ListMixin. """ if list_class is TestList: L1 = TestList() L2 = TestList() L3 = TestList() L1.extend([L1, L2, L3]) L2.append(L3) L3.append(L1) assert (repr(L1) == 'TestList([TestList(...), TestList([TestList' + '([TestList(...)])]), TestList([TestList(...)])])') L1 = TestList() L1.append(L1) L2 = copy.deepcopy(L1) assert id(L1) == id(L1[0]) assert id(L2) == id(L2[0]) L3 = copy.copy(L2) assert id(L3) != id(L3[0]) assert id(L3[0]) == id(L2[0]) == id(L2) if rand_elem is None: def rand_elem(): return random.randrange(50) def get_or_none(obj, getindex): try: return obj[getindex] except IndexError: return None def set_or_none(obj, setindex, setvalue): try: obj[setindex] = setvalue return setvalue except IndexError: return None def extended_set_or_none(obj, setindex, setvalue): try: obj[setindex] = setvalue return setvalue except ValueError: return None def insert_or_none(obj, insertindex, insertvalue): try: obj.insert(insertindex, insertvalue) except IndexError: return None def pop_or_none(obj, *popargs): try: obj.pop(*popargs) except IndexError: return None def remove_or_none(obj, removevalue): try: obj.remove(removevalue) except IndexError: return None import random import sets import sys for i in [ 1, 3, 8, 16, 18, 29, 59, 111, 213, 501, 1013, 2021, 3122, 4039, 5054 ]: x = [rand_elem() for j in range(i)] y = list_class(x) for j in range(100): r = random.randrange(13) if r == 0: # Set element at a random index if len(x) != 0: k = random.randrange(-2 * len(x), 2 * len(x)) v = rand_elem() assert set_or_none(x, k, v) == set_or_none(y, k, v) elif r == 1: # Delete element at random index if len(x) != 0: k = random.randrange(len(x)) del x[k] del y[k] elif r == 2: # In place add some elements addelems = [rand_elem() for inx in range(random.randrange(4))] x += addelems y += addelems elif r == 3: # In place add an element addelem = rand_elem() x.append(addelem) y.append(addelem) elif r == 4: # In place add some elements addelems = [rand_elem() for inx in range(random.randrange(4))] x += addelems y += addelems elif r == 5: # Insert an element addelem = rand_elem() insertidx = random.randrange(-2 * len(x), 2 * len(x) + 1) assert insert_or_none(x, insertidx, addelem) == \ insert_or_none(y, insertidx, addelem) elif r == 6: # Pop an element popidx = random.randrange(-2 * len(x), 2 * len(x) + 1) assert pop_or_none(x, popidx) == pop_or_none(y, popidx) elif r == 7: # Pop last element assert pop_or_none(x) == pop_or_none(y) elif r == 8: # Remove an element if len(x) != 0: remvalue = random.choice(x) assert remove_or_none(x, remvalue) == remove_or_none( y, remvalue) elif r == 9: if random.randrange(5) == 0: # Sort if sys.version_info[:3] >= (2, 4, 0): r2 = random.randrange(6) else: r2 = random.randrange(2) def keyfunc(keyitem): return (keyitem - 5)**2 def cmpfunc(a, b): return cmp((a + 9)**2, (b - 5)**3) if r2 == 0: x.sort() y.sort() elif r2 == 1: x.sort(cmpfunc) y.sort(cmpfunc) elif r2 == 2: x.sort(cmpfunc, keyfunc) y.sort(cmpfunc, keyfunc) elif r2 == 3: x.sort(cmpfunc, keyfunc, True) y.sort(cmpfunc, keyfunc, True) elif r2 == 4: x.sort(cmpfunc, reverse=True) y.sort(cmpfunc, reverse=True) elif r2 == 5: x.sort(None, keyfunc, True) y.sort(None, keyfunc, True) elif r == 10: # Remove a slice start = random.randrange(-2 * len(x), 2 * len(x) + 1) end = random.randrange(-2 * len(x), 2 * len(x) + 1) step = random.randrange(-2 * len(x), 2 * len(x) + 1) r2 = random.randrange(3) if r2 == 0: step = random.randrange(-5, 5) elif r2 == 1: step = 1 if step == 0: step = 1 del x[start:end:step] del y[start:end:step] elif r == 11: # Assign to a regular slice start = random.randrange(-2 * len(x), 2 * len(x) + 1) end = random.randrange(-2 * len(x), 2 * len(x) + 1) assignval = [ rand_elem() for assignidx in range(random.randrange(10)) ] x[start:end] = assignval y[start:end] = assignval elif r == 12: # Assign to an extended slice start = random.randrange(-2 * len(x), 2 * len(x) + 1) end = random.randrange(-2 * len(x), 2 * len(x) + 1) step = random.randrange(-2 * len(x), 2 * len(x) + 1) r2 = random.randrange(3) if r2 == 0: step = random.randrange(-5, 5) elif r2 == 1: step = 1 if step == 0: step = 1 if step == 1: step = 2 indices = range(*slice(start, end, step).indices(len(x))) assignval = [rand_elem() for assignidx in indices] if random.randrange(2) == 0: # Make right hand value have incorrect length if random.randrange(2) == 0 and len(assignval) > 0: if random.randrange(2) == 0: assignval.pop() else: assignval = [] else: assignval.append(1) assert (extended_set_or_none( x, slice(start, end, step), assignval) == extended_set_or_none(y, slice(start, end, step), assignval)) # Check that x == y in a variety of ways. if len(x) != 0: for i4 in range(20): i3 = random.randrange(-2 * len(x), 2 * len(x)) assert get_or_none(x, i3) == get_or_none(y, i3) assert isinstance(y[:], list_class) assert isinstance(y[:1], list_class) assert list(iter(x)) == list(iter(y)) assert list(iter(iter(x))) == list(iter(iter(y))) assert str(x) == str(y) assert x + [1, 2] == y + [1, 2] assert x * 0 == y * 0 assert x * -1 == y * -1 assert x * -5000 == y * -5000 assert x * 1 == y * 1 assert x * 2 == y * 2 assert isinstance(x + y, list) assert x + y == x + list(y) elems = sets.Set(x) elems2 = sets.Set(y) assert elems == elems2 def index_or_none(obj, search, *args): try: return obj.index(search, *args) except ValueError: return None for key in elems: assert x.count(key) == y.count(key) assert index_or_none(x, key) == index_or_none(y, key) i1 = random.randrange(-len(x) - 2, len(x) + 2) i2 = random.randrange(-len(x) - 2, len(x) + 2) assert index_or_none(x, key, i1, i2) == \ index_or_none(y, key, i1, i2) assert x[:] == y[:] # Get slices for sliceidx in range(10): if len(x) != 0: start = random.randrange(-2 * len(x), 2 * len(x) + 1) end = random.randrange(-2 * len(x), 2 * len(x) + 1) step = random.randrange(-2 * len(x), 2 * len(x)) r2 = random.randrange(3) if r2 == 0: step = random.randrange(-5, 5) elif r2 == 1: step = 1 if step == 0: step = 1 assert x[start:end:step] == y[start:end:step] assert cmp(x, y) == 0 assert len(x) == len(y) assert x == y x.reverse() y.reverse() assert x == y x.reverse() y.reverse() for k in range(len(x)): assert x[k] == y[k]
def osg_site_size(sql_results, globals=globals(), **kw): """ Calculate the OSG's size in terms of utilized CPUs, accessible CPUs, and total CPUs. Break down these statistics by site. """ USED = 'Max Used' UNACCESSIBLE = 'In OSG, but never used' if 'normalize' in kw and kw['normalize'].lower().find('t') >= 0: normalize = True else: normalize = False utilized_results, md = results_parser(sql_results, globals=globals, **kw) accessible_results, _ = globals['GratiaBarQueries'].osg_avail_size( span=7 * 86400, starttime=time.time() - 7 * 86400 * 52) total_results, _ = globals['GIPQueries'].gip_site_size( span=7 * 86400, starttime=time.time() - 7 * 86400 * 52, max_size=20000) ksi2k_results, _ = globals['GIPQueries'].subcluster_score_ts() ksi2k_results2, _ = globals['GIPQueries'].subcluster_score_ts2() ksi2k_results2 = ksi2k_results2['Nebraska'] sites = utilized_results.keys() new_results = {} all_intervals = sets.Set() for site in sites: intervals = utilized_results[site].keys() all_intervals.union_update(intervals) all_intervals = list(all_intervals) all_intervals.sort() total_utilized_results = {} total_accessible_results = {} total_total_results = {} final_results = {USED: {}, ACCESSIBLE: {}, UNACCESSIBLE: {}} may_1 = time.mktime((2008, 05, 01, 0, 0, 0, 0, 0, 0)) avg_ksi2k_results = {} ksi2k_min = min(1.7, ksi2k_results2.values()) ksi2k_max = ksi2k_min for interval in all_intervals: ksi2k_max = max(ksi2k_results2.get(interval, ksi2k_min), ksi2k_max) avg_ksi2k_results[interval] = ksi2k_max prev_interval = 0 for interval in all_intervals: # Process accessible numbers current_acc = 0 for site, vals in accessible_results.items(): if site not in ksi2k_results: ksi2k = avg_ksi2k_results[interval] elif interval not in ksi2k_results[site]: ksi2k = min(min(ksi2k_results[site].values()), avg_ksi2k_results[interval]) else: ksi2k = ksi2k_results[site][interval] if normalize: current_acc = vals.get(interval, 0) * ksi2k else: current_acc = vals.get(interval, 0) prev_acc = total_accessible_results.setdefault(site, 0) total_accessible_results[site] = max(prev_acc, current_acc) # Process total size numbers cumulative3 = 0 for site, vals in total_results.items(): if site not in ksi2k_results: ksi2k = avg_ksi2k_results[interval] elif interval not in ksi2k_results[site]: ksi2k = min(min(ksi2k_results[site].values()), avg_ksi2k_results[interval]) else: ksi2k = ksi2k_results[site][interval] if normalize: curr_total = vals.get(interval, 0) * ksi2k else: curr_total = vals.get(interval, 0) prev_total = total_total_results.setdefault(site, 0) total_total_results[site] = max(prev_total, curr_total) if interval < may_1: continue for site in sites: # Update the final results final_results[USED][site] = total_accessible_results.get(site, 0) final_results[UNACCESSIBLE][site] = max(total_total_results.get( \ site, 0) - total_accessible_results.get(site, 0), 0) return final_results, md
def main(): species_busco_genes = {} species_superalignment = {} # intialize species_busco_genes with species BUSCO hits for arg in args: print arg convert_phylip(arg, string.replace(arg, '.best.phy', '')) species_alignment = parse_phylip(string.replace(arg, '.best.phy', '')) for species in species_alignment.keys(): species_information = string.split(species, '_') species_busco_genes[species_information[0]] = [] # for every phylip file, convert to relaxed Phylip format # run QKphylogeny_alignment_analysis.py for arg in args: print 'superset:\t', arg, strftime("%Y-%m-%d %H:%M:%S", gmtime()) process_name = "python QKphylogeny_alignment_analysis.py -q -a %s -d %s -o %s" % ( string.replace(arg, '.best.phy', ''), options.depth, string.replace(arg, '.phy.best.phy', '.e.phy')) process = subprocess.Popen(process_name, shell=True) process.wait() # evaluate the super set of genes (for all species) species_alignment = parse_phylip( string.replace(arg, '.phy.best.phy', '.e.phy')) for species in species_alignment.keys(): species_information = string.split(species, '_') species_busco_genes[species_information[0]].append( string.replace(arg, '.PRANK.phy.best.phy', '')) # concatenate all species, evaluate amount of missing data for every species print species_busco_genes.keys() for species in species_busco_genes.keys(): species_superalignment[species] = '' for arg in args: print 'merge species:\t', arg, strftime("%Y-%m-%d %H:%M:%S", gmtime()) # read multiple sequence alignment species_alignment = parse_phylip( string.replace(arg, '.phy.best.phy', '.e.phy')) # set alignment length (for missing data) alignment_length = len(species_alignment[species_alignment.keys()[0]]) missing_template = '' for index in range(alignment_length): missing_template += '-' # add alignments for all species in alignment species_present = [] for species in species_alignment.keys(): species_information = string.split(species, '_') species_superalignment[ species_information[0]] += species_alignment[species] species_present.append(species_information[0]) # add missing data for all other species not in the alignment for species in list( sets.Set(species_superalignment.keys()) - sets.Set(species_present)): species_superalignment[species] += missing_template # print the degree of missing data for each species for species in species_superalignment.keys(): print species, species_superalignment[species].count('-'), len( species_superalignment[species]) # export concatenated alignment Fopen = open(options.superalignment, 'w') Fopen.write( ' ' + str(len(species_superalignment.keys())) + ' ' + str(len(species_superalignment[species_superalignment.keys()[0]])) + '\n') for species in species_superalignment.keys(): Fopen.write(species) for index in range(10 - len(species)): Fopen.write(' ') Fopen.write(species_superalignment[species] + '\n') Fopen.close() return
def getvar(site): names = [] actions = [] print "\n", "-" * 45 print "[+] Searching:", site try: webpage = urllib2.urlopen(proto + "://" + site, port).read() emails = re.findall('[\w\.\-]+@[\w\.\-]+\.\w\w\w', webpage) var = re.findall("\?[\w\.\-/]*\=", webpage) if len(var) >= 1: var = list(sets.Set(var)) found_action = re.findall("action=\"[\w\.\-/]*\"", webpage.lower()) found_action = list(sets.Set(found_action)) if len(found_action) >= 1: for a in found_action: a = a.split('"', 2)[1] try: if a[0] != "/": a = "/" + a except (IndexError): pass actions.append(a) found_names = re.findall("name=\"[\w\.\-/]*\"", webpage.lower()) found_names = list(sets.Set(found_names)) for n in found_names: names.append(n.split('"', 2)[1]) print "[+] Variables:", len(var), "| Actions:", len( actions), "| Fields:", len(names) print "[+] Avg Requests:", ( len(var) + len(names) + (len(actions) * len(names)) + (len(actions) * len(names))) * len(xss_payloads) if len(var) >= 1: for v in var: if site.count("/") >= 2: for x in xrange(site.count("/")): for xss in xss_payloads: tester(site.rsplit('/', x + 1)[0] + "/" + v + xss) for xss in xss_payloads: tester(site + "/" + v + xss) if len(names) >= 1: for n in names: if site.count("/") >= 2: for x in xrange(site.count("/")): for xss in xss_payloads: tester( site.rsplit('/', x + 1)[0] + "/" + "?" + n + "=" + xss) for xss in xss_payloads: tester(site + "/" + "?" + n + "=" + xss) if len(actions) != 0 and len(names) >= 1: for a in actions: for n in names: if site.count("/") >= 2: for x in xrange(site.count("/")): for xss in xss_payloads: tester( site.rsplit('/', x + 1)[0] + a + "?" + n + "=" + xss) #tester(site.split("/")[0]+a+"?"+n+"="+xss) if len(actions) != 0 and len(var) >= 1: for a in actions: for v in var: if site.count("/") >= 2: for x in xrange(site.count("/")): for xss in xss_payloads: tester( site.rsplit('/', x + 1)[0] + a + v + xss) else: for xss in xss_payloads: tester(site.split("/")[0] + a + v + xss) if sys.argv[1].lower() == "-g" or sys.argv[1].lower() == "-google": urls.remove(site) except (socket.timeout, IOError, ValueError, socket.error, socket.gaierror): if sys.argv[1].lower() == "-g" or sys.argv[1].lower() == "-google": urls.remove(site) pass except (KeyboardInterrupt): print "\n[-] Cancelled -", timer(), "\n" sys.exit(1)