Example #1
0
def test_anhadir_id_lista():
    lista_test = ["id_1", "id_1", "id_2", "id_3"]
    with pytest.raises(SystemExit) as pytest_error:
        Parse.anhadir_id_lista("id_1", lista_test)
    assert pytest_error.type == SystemExit
    Parse.anhadir_id_lista("id_4", lista_test)
    assert lista_test[len(lista_test) - 1] == "id_4"
class main:
    try:
        if len(sys.argv) < 2:
            url = raw_input("Please enter URL: ")
            if ' ' in url:
                raise Exception("InputError")
            parseObj = Parse(url)
        else:
            url = sys.argv[1]
            parseObj = Parse(url)
    except ValueError as e:
        print(e)
        sys.exit()

    title = parseObj.getTitle()
    keyword = parseObj.getKeyword()

    content = parseObj.getParsedContent()
    content = content[-20:]

    header = parseObj.getParsedHeader(title, keyword)
    header = header[-10:]

    analyzer = mergeBag(header, content, 3)
    analyzer = sortBag(analyzer)
    analyzer = list(analyzer[-8:])
    analyzer.reverse()

    print("\nWebpage: " + url)
    print('\nKeywords:')
    for w in analyzer:
        print(w[0])
    print('')
Example #3
0
def translate_directory(filename, root, output_file):
    input_file = open(root + "/" + filename, "r")
    for ln in input_file:
        line = ln.strip()
        if not Parse.ignore_line(line):  # check if we ignore line or not
            translated = Parse.parse_line(line, filename)
            output_file.write(translated)
    input_file.close()
Example #4
0
 def get_result(self, text):
     sents, res = [], []
     parsed_text = utils.call_stanfardnlp_parse(text)
     parsed_sentences = Parse.get_parsed_text(parsed_text.split('\n'))
     for parsed_sentence in parsed_sentences:
         root = Parse.encode_tree(parsed_sentence)
         sents.append(root.to_strings())
         temp = self.traverse_check(root)
         res.append(temp)
     return sents, res
Example #5
0
def translate_vm_file(file):
    input_file = open(file, "r")
    file_name = path.basename(input_file.name)[:FILE_NAME_LAST_INDEX]
    output_file = open(file[:FILE_NAME_LAST_INDEX] + ".asm", "w")
    for ln in input_file:
        line = ln.strip()
        if not Parse.ignore_line(line):  # check if we ignore line or not
            translated = Parse.parse_line(line, file_name)
            output_file.write(translated)
    input_file.close()
    output_file.close()
Example #6
0
def update_news(bot, job):
    """
    Обновление новостей в БД
    :param bot:
    :param job:
    :return: void
    """
    topics = Parse.parse_topics(URL)
    for t in topics:
        Parse.parse_one_doc_to_set_topic_time(t)
    topics = DB.remain_need_to_update_topics(topics)
    topics = Parse.parse_docs(topics)
    DB.update_DB(topics)
Example #7
0
def test_comprobar_existencia_dependencias():
    lista_id = ["foo", "id_1", "prueba"]
    tareaBien = Tarea.Tarea("root", "raiz", ["foo", "id_1"],
                            datetime.datetime(2021, 1, 1), 30, ["foo", "id_1"])
    tareaMal = Tarea.Tarea("root", "raiz", ["error", "nop"],
                           datetime.datetime(2021, 1, 1), 30, ["error", "nop"])
    try:
        Parse.comprobar_existencia_dependencias(tareaBien, lista_id)
    except SystemExit:
        pytest.fail()
    with pytest.raises(SystemExit) as pytest_error:
        Parse.comprobar_existencia_dependencias(tareaMal, lista_id)
    assert pytest_error.type == SystemExit
Example #8
0
def transfer_from_online_to_db():
    dl_count = 0
    files = get_files_to_dl()
    dl_start = datetime.datetime.now()
    for file in files:
        url, index = Parse.get_info_from_file_name(file)
        dl_file(url)
        data = Parse.get_data_from_txt_file(index)
        MySQL.insert_data_for_day(data)
        os.remove("data.txt")
        dl_count += 1
        print(
            f"downloaded {dl_count} at {(datetime.datetime.now() - dl_start) / dl_count} per dl"
        )
def do_parse(infile):
    """ this is the main parsing module """

    preprocess(infile) #okay
    (sen, doc) = extract_features()

    java_classpath = 'Tools/grmm/class:Tools/grmm/lib/mallet-deps.jar:Tools/grmm/lib/grmm-deps.jar'    
    java_prog = 'edu.umass.cs.mallet.grmm.learning.AcrfForTestJoty'

    if sen == "yes":
        apply_sent_model("tmp_sen.feat", java_classpath, java_prog)
    if doc == "yes":
        apply_doc_model("tmp_doc.feat", java_classpath, java_prog)

    Parse.parse('parse_sen.rel', 'tmp_sen.prob', "tmp_sen.feat", "tmp_sen.dis", 'parse_doc.rel', 'tmp_doc.prob', "tmp_doc.feat", "tmp_doc.dis", "tmp.edu")
def do_parse(infile):
    """ this is the main parsing module """

    preprocess(infile) #okay
    (sen, doc) = extract_features()

    java_classpath = 'Tools/grmm/class:Tools/grmm/lib/mallet-deps.jar:Tools/grmm/lib/grmm-deps.jar'    
    java_prog = 'edu.umass.cs.mallet.grmm.learning.AcrfForTestJoty'

    if sen == "yes":
        apply_sent_model("tmp_sen.feat", java_classpath, java_prog)
    if doc == "yes":
        apply_doc_model("tmp_doc.feat", java_classpath, java_prog)

    Parse.parse('parse_sen.rel', 'tmp_sen.prob', "tmp_sen.feat", "tmp_sen.dis", 'parse_doc.rel', 'tmp_doc.prob', "tmp_doc.feat", "tmp_doc.dis", "tmp.edu")
Example #11
0
def translate_directory(filename, root, output_file):
    """
    translate all vm files in directory to one asm file.
    :param filename: name of file
    :param root: path
    :param output_file: translated vm to asm file
    :return: None
    """
    input_file = open(root + os.sep + filename, "r")
    for ln in input_file:
        line = ln.strip()
        if not Parse.ignore_line(line):  # check if we ignore line or not
            translated = Parse.parse_line(line, filename)
            output_file.write(translated)
    input_file.close()
Example #12
0
def test_cast_duracion():
    assert Parse.cast_duracion("20") == 20
    assert Parse.cast_duracion("20d") == 20
    assert Parse.cast_duracion("20w") == 140
    assert Parse.cast_duracion("20m") == 600
    assert Parse.cast_duracion("2y") == 730
    with pytest.raises(SystemExit) as pytest_error:
        Parse.cast_duracion("fooy")
    assert pytest_error.type == SystemExit
    with pytest.raises(SystemExit) as pytest_error:
        Parse.cast_duracion("y123")
    assert pytest_error.type == SystemExit
Example #13
0
def main():

    print(colored("\n#####################################################",'green'))
    print(colored("     Welcome to Team-15 DataBase Management System      ",'green'))
    print(colored("#####################################################\n",'green'))
    userLoginSignUp()
    query = ""
    database=""
    queryProcessor=qp.QueryProcessor()
    while not query.lower() == "quit":
        query=input(constants.InputQuery)
        if "use" in query.lower():
            Parse.Parse.newDB=True
            db_raw=re.compile(r'use\s(.*)\s*',re.IGNORECASE).findall(query)
            database=db_raw[0]
            query=input()
        else:
            database
        query_type = Parse.Parse(database,query,queryProcessor)
        val = query_type.check_query()
        print("\n#####################################################")
        if val == -1:
            print(colored("Incorrect Query",'red'))
        elif val == 0:
            break
    
    
    print("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~Thanks!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
Example #14
0
 def __init__(self):
     self.__stroage = Storeage.Stroage()
     self.__chrome_options = webdriver.ChromeOptions()
     # self.__chrome_options.add_argument('--proxy-server=%s' % PROXY)
     self.__chrome_options.add_experimental_option(
         "prefs", {'profile.default_content_setting_values.images': 2})
     self.__parse = Parse.Parse()
Example #15
0
def main(filename):
    global tokentuple
    Scan.init(filename)
    while True:
        raw_input("Press Enter")
        print
        (textline, indent, tokens) = Scan.readLine()
        tree, rest = Parse.parseCOMMANDLINE(tokens)
        print "tree =", tree
        print "what's left over:", rest
        prop2 = nnfOf(tree[1])
        print "- shifted inwards:"
        print prop2
        print
        prop3 = nfOf("or", "and", prop2)
        print "cnf:"
        print prop3
        print
        prop4 = flatten("or", "and", prop3)
        print "flattened cnf:"
        print prop4
        print
        prop5 = removeDuplicates(prop4)
        print "no duplicates:", prop5
        prop6 = removeOpposites(prop5)
        print "simplified cnf:"
        print prop6
        for clause in prop6:
            print clause
    Scan.quit()
Example #16
0
def insertAppend(C6, v, e) :
    """appends  e  to the end of array/list  v  in the heap.
       Does the same actions as an insertAssign to an indexed array,
       but preserves more heap info since the append does not produce
       any aliases within v

       params : C6; v - a vartee; e - an etree
    """
    sigma = C6["store"]
    heap = C6["heap"]
    vname = v[1]
    vold = Parse.makeOldVar(v)
    if lookupType(C6, vname) != "array" :
        error("cannot append to a non-list/array")
    else :
        loc = PE.peToTuple(sigma[vname])
        length = heap[loc][0]
        newlength = PE.add(length, PE.make(1))
        vector = heap[loc][1]

        # assign original to v_old:
        sigma[vold[1]] = sigma[vname]

        # make copy for the new value of  v:
        copy = {}
        for k in vector :
            copy[k] = vector[k]
        newloc = PE.make(PE.makeSym())
        rhs = PE.evall(C6, e)
        copy[ PE.peToTuple(length) ] = rhs
        sigma[vname] = newloc
        heap[ PE.peToTuple(newloc) ] = (newlength, copy)
Example #17
0
File: CNF.py Project: mmccarty/nell
def goSequent() :
    """goSequent helps a user interactively type a sequent to be proved,
       formats it in cnf (where the goal prop is negated),
       and writes it as a string to a textfile.

       The string has the format,   D1 D2 ... Dn,
       where     D ::=  [F1, F2, ... Fm]
                 F ::=  "p"  |  "-p"
                         where  p  is a string of letters
       Example: the input,    p->r, q->r |- (p | q) -> r,
       is mapped to the cnf form,
               [['-p', 'r'], ['-q', 'r'], ['p', 'q'], ['-r']]
       and the string,
               [-p,r][-q,r][p,q][-r]
       is written to the output file that the user requests.
    """
    import Parse
    premises = True
    answer = []
    while premises:
        text = raw_input("Type premise (or |-): ").strip()
        if text == "|-" :
            premises = False
        else :
            prop = cnf( Parse.parse(Parse.scan(text)) )
            answer = answer + prop
    text = raw_input("Type goal prop: ")
    not_text = "-(" + text + ")"
    not_goal =  cnf( Parse.parse(Parse.scan(not_text)) )
    answer = answer + not_goal

    print "clauses are:", answer
    print
    filename = raw_input("Type name of destination file: ")
    output = open(filename, "w")
    textline = ""
    for clause in answer :
        textline = textline + "["
        items = ""
        for literal in clause :
            items = items + "," + literal
        if items != "" :
            items = items[1:]   # forget leading comma
        textline = textline + items + "]"
    print "wrote to", filename + ":", textline
    output.write(textline)
    output.close
Example #18
0
def main():
    in_put = "Input.txt"
    out_put = "Output.txt"
    with open(in_put, "r") as file:
        str = file.read()
        result = Parse.process(str)
    with open(out_put, "w") as file:
        file.write(result)
Example #19
0
def translate_vm_file(file):
    """
    translate onlt one vm file to asm file
    :param file: name of file
    :return: None
    """
    input_file = open(file, "r")
    file_name = path.basename(input_file.name)[:FILE_NAME_LAST_INDEX]
    output_file = open(file[:FILE_NAME_LAST_INDEX] + ".asm", "w")
    output_file.write(Translator.write_init())
    for ln in input_file:
        line = ln.strip()
        if not Parse.ignore_line(line):  # check if we ignore line or not
            translated = Parse.parse_line(line, file_name)
            output_file.write(translated)
    input_file.close()
    output_file.close()
Example #20
0
	def	execute(self, edit, file):
		Norme.header(file.header)
		Norme.includes(file.includes)
		if len(file.functions) > 5:
			file.errors.append(Parse.Error("NBFUNCS", "", "FILE"))
		for function in file.functions:
			Norme.function(function)
		self.show_errors(file)
Example #21
0
def search(posting_path, query, stemmer, query_source_path, list_of_language,
           list_of_city, semantic):
    Parse.set_stop_words_file(posting_path + "/stop_words.txt")
    list_save_queries = Parse.parse_queries(query_source_path, posting_path,
                                            query, stemmer, semantic)
    res = {}
    for query_post in list_save_queries:
        fileName = posting_path + "/" + query_post + ".pkl"
        file = open(fileName, "rb+")
        querie_term_dictionary = pickle.load(file)
        file.close()
        os.remove(fileName)
        query_name = query_post.replace('post', "")
        res[query_name] = Ranker.rank(posting_path, stemmer,
                                      querie_term_dictionary, list_of_language,
                                      list_of_city)
    return res
Example #22
0
def translate_file(file):
    """
    Translate file to hack binary code
    :param file: file to translate to Hack binary code
    :return: None
    """
    input_file = open(file, "r")
    # change extension to .hack
    output_file = open(file[:FILE_NAME_LAST_INDEX] + ".hack", "w")
    first_pass(input_file)  # first pass
    input_file.seek(0)
    for ln in input_file:  # second pass
        line = ln.strip()
        if not Parse.ignore_line(line) and not line.startswith("("):
            in_binary = Parse.parse_line(line)
            output_file.write(in_binary + '\n')
    input_file.close()
    output_file.close()
Example #23
0
def matchDef(C6, btree) :
    """attempts to locate a defn saved in  C6["defs"] that matches
       the assert,  btree.  Returns whether or not there was success.
    """
    for scheme in C6["defs"] :
        success = Parse.match({}, btree, scheme)
        if success :  return True
    # else, no match:
    return False
Example #24
0
def insertAssign(C6, v, etree):
    """updates the store of C6  with an assignment. 
       If v already exists in C6's store, saves former value as  v_old
       for later use in proof reasoning.

       params: v - has form,  ["var", s]  or  ["index", ["var", s], etree]
               etree - another etree, to be assigned to the var.
    """
    sigma = C6["store"]
    heap = C6["heap"]
    badvars = C6["novars"]
    if v[0] == "var" : vtree = v
    elif v[0] == "index" : vtree = v[1]
    vold = Parse.makeOldVar(vtree)  # ["var", vname_old]

    # first, check if we are allowed to update  v:
    if (vtree in badvars) :
        error("you may not update a protected global var outside of its maintenance function")
        return

    # if possible, rename current value of var v  as  v_old:

    if v[0] == "var" and v[1] in sigma : # and lookupType(C6, v[1]) != "array":
        sigma[vold[1]] = sigma[v[1]]  # assign v's current value to v_old
    elif v[0] == "index" and lookupType(C6, v[1][1]) == "array":
        vname = v[1][1]
        loc = PE.peToTuple(sigma[vname])
        length = heap[loc][0]
        vector = heap[loc][1]
        # make copy:
        copy = {}
        for k in vector :
            copy[k] = vector[k]
        # assign original to v_old and copy to v :
        sigma[vold[1]] = sigma[vname]
        newloc = PE.make(PE.makeSym())
        sigma[vname] = newloc
        heap[ PE.peToTuple(newloc) ] = (length, copy)

    # (later,  vold  will be erased from  sigma....)
    # now, eval assignment's  rhs  and store it into  v:
    rhs = PE.evall(C6, etree)

    if v[0] == "var":  # simple var
            sigma[v[1]] = rhs
    elif v[0] == "index":   # an array/list reference
        # eval  index  expression (NOTE: no nested indexing allowed):
        indexpe = PE.evall(C6, v[2])
        # save values in sigma[vname][1] provably distinct from  vname[index]:
        vname = v[1][1]
        if vname not in sigma or lookupType(C6, vname) != "array" :
            error(vname + " is not an array in the store")
            #sigma[vname] = PE.makeArray()
        else :
            vmap = heap[PE.peToTuple(sigma[vname])][1]
            saveDistinctElements(C6, vmap, indexpe)
            vmap[PE.peToTuple(indexpe)] = rhs
def num_valid_valuations(formula):
    formula_tree = Parse.parse(formula)
    num_valid = 0

    for v in all_valuations(atoms(formula_tree)):
        if eval(formula_tree, v):
            num_valid += 1

    return num_valid
    def test_eval_complex(self):
        self.assertFalse(Eval.eval(Parse.parse("a /\\ ~(a <=> a)"), {"a": False}))
        self.assertFalse(Eval.eval(Parse.parse("a /\\ ~(a <=> a)"), {"a": True}))

        self.assertTrue(Eval.eval(Parse.parse("(Smoke => Fire) => (~Smoke => ~Fire)"), {"Smoke": False, "Fire": False}))
        self.assertFalse(Eval.eval(Parse.parse("(Smoke => Fire) => (~Smoke => ~Fire)"), {"Smoke": False, "Fire": True}))
        self.assertTrue(Eval.eval(Parse.parse("(Smoke => Fire) => (~Smoke => ~Fire)"), {"Smoke": True, "Fire": False}))
        self.assertTrue(Eval.eval(Parse.parse("(Smoke => Fire) => (~Smoke => ~Fire)"), {"Smoke": True, "Fire": True}))

        self.assertTrue(Eval.eval(Parse.parse("a /\\ (b \\/ c)"), {"a": True, "b": False, "c": True}))
        self.assertFalse(Eval.eval(Parse.parse("(a /\\ b) \\/ c"), {"a": True, "b": False, "c": False}))
Example #27
0
def score_hand(hand, winning_tile, tsumo, ron, seat, prevalent_wind,
               first_turn, riichi, concealed, first_round, ippatsu, last_tile,
               d):
    """
    Symbols:
        - Coins:      [cn]
        - Bamboo:     [bn]
        - Characters: [kn]
        - Winds:
            - East:   [we]
            - South:  [ws]
            - West:   [ww]
            - North:  [wn]
        - Dragons:
            - Red:    [dr]
            - Green:  [dg]
            - White:  [dw]

    Note: For suits replace 'n' with the value of the tile. For red fives, replace 'n' with 'r'
    Note: Spaces between tiles may be used to help readability.

    Example Hand:
        '[c1][c2][c3] [dr][dr][dr] [we][we][we] [k1][k2][k3] [br][b5]'
         Hand Contents:
            - Chow of coins 1-3
            - Pung of Red Dragons
            - Pung of East Winds
            - Chow of Characters 1-3
            - Eyes of Bamboo 5 and red 5
    """
    tiles = Parse.parse_hand(hand)
    winning = Parse.parse_winning_tile(winning_tile)

    count = Counter(tiles)
    if d:
        for k in count.keys():
            print("%s %s" % (count[k], k))
        del count
        print("Winning Tile: %s" % winning)
        print()

    Fan.calculate_fan(tiles, winning, tsumo, ron, seat, prevalent_wind,
                      first_turn, riichi, concealed, first_round, ippatsu,
                      last_tile, d)
Example #28
0
def get_bot_code(code):
    file = urllib.request.urlopen("https://raw.githubusercontent.com/XenthisX/bot/master/bots/bot.clj").read()
    print(code)
    if (code == 'js'):
        file = urllib.request.urlopen("https://raw.githubusercontent.com/erichahn/wombats-python/master/bots/bottest.js")
        extension = "bottest.js"
    elif(code == 'py'):
        file = urllib.request.urlopen("https://raw.githubusercontent.com/erichahn/wombats-python/master/bots/bottest.py")
        extension = "bottest.py"
    return Parse.run_command(extension, file.read())
Example #29
0
def jsonOutputTextOnly(toExport, filename):
    l = []
    for tab in toExport:
        d = {}
        d['id'] = tab['idCDLI']
        d['text'] = Parse.getFullText(tab)
        l.append(d)
    f = open(filename, 'w')
    json.dump(l, f)
    f.close()
Example #30
0
def insertGlobalInvariant(C6, inv):
    """adds  inv,  a btree, to  C6's  globals  list of global invariants
       # adds  to  gvar all vtrees (vars) embedded within inv;

       adds global invariant to  rels  and  facts, since it is now
       established, and we are not allowed to change  any of the vars
       it mentions (except from within a fcn that declars the vars
       as globals.

       finally, ``locks'' the global vars from updates, by placing
       their names on the ``novars'' list.
       Only functions that mention the vars in its ``globals''
       clause can update the global vars
    """
    C6["globalinvs"] = C6["globalinvs"] + [inv]
    globals_in_inv = Parse.removeDuplicates(Parse.extractVarsFrom(inv))
    C6["novars"] = C6["novars"] + globals_in_inv
    #erase(C6, globals_in_inv)
    reset(C6, globals_in_inv)
Example #31
0
	def	keyword(line):
		regex = re.compile("(\sif\s?)+|(\swhile\s?)+|(\sreturn\s?)+")
		regex_space = re.compile("(\sif\s)+|(\swhile\s)+|(\sreturn\s)+")
		res = regex.match(line.text)
		if res:
			res_text = res.group(0)
			last_res = regex_space.match(res_text)
			if not last_res:
				line.errors.append(Parse.Error("NOSPCKEY", res_text, "LINE", line))
		return
Example #32
0
 def parse_document(self):
     # extract all relevant document fields via the parser
     parser = Parse.Parse(self.constants, self.stop_words, self.is_stemming)
     parser.parse_document(self.content)
     self.length = parser.position
     self.max_tf = parser.max_tf
     self.docno = parser.docno
     self.terms = parser.terms
     del parser
     self.num_of_words = len(self.terms)
Example #33
0
	def	includes(includes):
		sys = True
		sys_regex = re.compile("#\s*include\s*<[\s\S]+?>")
		usr_regex = re.compile("#\s*include\s*\"[\s\S]+?\"")
		for include in includes.lines:
			if sys_regex.match(include.text):
				if not sys:
					include.errors.append(Parse.Error("WRGLOCINCL", include.text, "INC", include))
			elif usr_regex.match(include.text):
				if sys:
					sys = False
def on_all_valuations(formula):
    '''
    Use eval for all valuations. Do all return true?
    '''
    formula_tree = Parse.parse(formula)

    for v in all_valuations(atoms(formula_tree)):
        if not eval(formula_tree, v):
            return False

    return True
def ReadFromFile(fileName):
    fs = open(fileName, 'r')
    n = int(fs.readline())
    type = fs.readline()
    type = type.rstrip('\n')
    k = 0
    equations = []
    for i in range(0, n):
        equations.append(fs.readline())
    temp = []
    IntialGuesses = []
    if type == 'seidel' or type == "All":
        temp = fs.read().split(" ")
        IntialGuesses = [int(numeric_string) for numeric_string in temp]
    if type == "All":
        Parse.Parse(equations, "LU", n, [], 0, 0)
        Parse.Parse(equations, "Gaussian-jordan", n, [], 0, 0)
        Parse.Parse(equations, "Gaussian-elimination", n, [], 0, 0)
        Parse.Parse(equations, "seidel", n, IntialGuesses, 50, 0.00001)
    if type == 'seidel':
        Parse.Parse(equations, type, n, IntialGuesses, 50, 0.00001)
Example #36
0
	def	function(function):
		inside_func = False
		count_lines = 0
		for line in function.lines:
			if line.text == "{":
				inside_func = True
			elif line.text == "}":
				inside_func = False
			elif inside_func:
				count_lines += 1
			Norme.line(line)
		if count_lines > 25:
			function.errors.append(Parse.Error("NBFUNCLNS", "", "FUNC"))
Example #37
0
def lookupGlobalInvariants(C6, varnames):
    """finds in C6 all global invariants relevant to varnames:

       params:  C6,  varnames -  a sequence of  vtrees
       returns: a list of all the invariants saved in C6 that mention
        any variable within varnames
    """
    invlist = []
    for ginv in C6["globalinvs"] :
        foundmes = map(lambda v: Parse.foundIn(v, ginv),  varnames)
        if True in foundmes :
            #invlist = invlist + (ginv,)
            invlist.append(ginv)
    return invlist
Example #38
0
 def beginTransaction(self):
     print("\n--------------------------------------------------------")
     print("tranasction started")
     query = ""
     while not query.lower() == "quit":
         query = input()
         query_type = prs.Parse(self.database, query, self.queryProcessor)
         val = query_type.check_query()
         if val == -1:
             print(colored("Incorrect Query", 'red'))
         elif val == 0:
             break
     print("transaction ended")
     print("\n--------------------------------------------------------")
Example #39
0
def CreateTree(file, trees):
    image = Image.open(file)
    name = os.path.basename(file)
    mask = Image.open("masks/" + name + ".png")
    width = image.size[0]
    height = image.size[1]
    pix = mask.load()
    print(name)
    new_image = image.copy()
    new_mask = mask.copy()
    new_image_pixels = new_image.load()
    new_mask_pixels = new_mask.load()
    for j in range(height):
        for i in range(width):
            if pix[i, j] == (255, 0, 0, 255):
                (blob, pix) = GT.eat(i, j, pix)
                (w, h, w_min, h_min) = GT.get_round_size(blob)
                new_tree = GetTree(trees, w, h)
                tree_pix = new_tree.load()
                (new_image_pixels,
                 new_mask_pixels) = DrawTree(new_image_pixels, new_mask_pixels,
                                             tree_pix, w, h, w_min, h_min)
                (new_mask_pixels, new_blob) = Separate(new_mask_pixels,
                                                       (w // 2) + w_min,
                                                       (h // 2) + h_min)
                for k in range(len(new_blob)):
                    new_mask_pixels[new_blob[k][0],
                                    new_blob[k][1]] = (0, 0, 0, 255)
    blurredimg = new_image.copy()
    blurredimg = blurredimg.filter(ImageFilter.GaussianBlur(10))
    blurredomg_pix = blurredimg.load()
    for j in range(height):
        for i in range(width):
            if new_mask_pixels[i, j] == (255, 0, 0, 255):
                new_mask_pixels[i, j] = (255, 255, 255, 255)
                new_image_pixels[i, j] = blurredomg_pix[i, j]
    return (new_image, new_mask)
Example #40
0
def decrypt(encrypteddata,destination):
	header = encrypteddata[:7]
	encrypteddata = encrypteddata[7:]
	
	if refreshed == True:
		if destination == "client":
			decrypteddata = stc.decrypt(decrypteddata)
			msgid, version, paylen = parseheader(decrypteddata)
			parsdata = Parse.packparse(msgid, decrypteddata)
		elif destination == "server":
			decrypteddata = cts.decrypt(decrypteddata)
			msgid, version, paylen = parseheader(decrypteddata)
			parsdata = Parse.packparse(msgid, decrypteddata)
	elif refreshed == False:
		if destination == "client":
			decrypteddata = lstc.decrypt(decrypteddata)
			msgid, version, paylen = parseheader(decrypteddata)
			parsedata = Parse.packparse(msgid, decrypteddata)
		elif destination == "server":
			decrypteddata = lcts.decrypt(decrypteddata)
			msgid, version, paylen = parseheader(decrypteddata)
			parsedata = Parse.packparse(msgid, decrypteddata)
			
	return decrypteddata
Example #41
0
def main1():
    dat = util.get_labeled_questions(str("data/nt-13588_2.tsv"), "data")
    fLog = sys.stdout
    for i, qinfo in enumerate(dat, 1):
        if qinfo.seq_qid[-1] != '0':
            parse = Parse()
            parse.type = Parse.FollowUp
            cond = Condition(3, Condition.OpEqRow, 7)
            parse.conditions = [cond]
            pred = parse.run(qinfo, resinfo)

            fLog.write("(%s) %s\n" % (qinfo.seq_qid, qinfo.question))
            fLog.write("Answer: %s\n" % ", ".join(
                ["(%d,%d)" % coord for coord in qinfo.answer_coordinates]))
            fLog.write("Predictions: %s\n" %
                       ", ".join(["(%d,%d)" % coord for coord in pred]))
            fLog.write("\n")
            fLog.flush()

        # use the gold answers
        resinfo = util.ResultInfo(qinfo.seq_qid, qinfo.question,
                                  qinfo.ques_word_sequence,
                                  qinfo.answer_coordinates,
                                  qinfo.answer_column_idx)
    def __init__(self, parent, id):
        super(PipeWindow, self).__init__(parent)

        self.widget =  QWidget()
        self.layout =  QGridLayout(self.widget)
        self.book = Parse.Sizing()

        self.setCentralWidget(self.widget)



        #elements in the PipeWindow
        self.widget_format()

        self.setWindowTitle(str(id))
        self.show()
Example #43
0
def first_pass(input_file):
    """
    Scan the entire file for labels, e.g (xxx), add the pair (xxx, address) to
    the symbols table, where address is the number of the instruction
    following (xxx)
    :param input_file: file to translate to Hack binary code
    :return: None
    """
    i = 0  # set counter for line instructions
    for ln in input_file:
        ln = ln.strip()
        if not Parse.ignore_line(ln) and not ln.startswith("("):
            i += 1  # counter for label symbols value
        key = ln[1:-1]  # remove bracket from label
        if ln.startswith("("):
            Code_tables.symbols_table[key] = i
Example #44
0
    def disasmCommands(co_code):
        """
		@param co_code: bytecode.
		@return: array of L{Command} class instances.
		"""
        commands = []
        i = 0
        border = len(co_code)
        while i < border:
            offset = i
            opcode = struct.unpack("=B", co_code[i])[0]
            i += 1
            name = None
            argument = None
            if opcode in Opcodes.opcodes:
                name = Opcodes.opcodes[opcode][0]
                if Opcodes.opcodes[opcode][1] != 0:
                    argument = Parse.getInt(co_code[i : i + Opcodes.opcodes[opcode][1]])
                    i += Opcodes.opcodes[opcode][1]
            commands.append(Command(offset, opcode, name, argument))
        return commands
Example #45
0
def RunMerge(args):
	cfg = Parse.generate_merge_cfg(args)
	Parse.print_merge_options(cfg)

	if not cfg['debug']:
		logging.disable(logging.CRITICAL)

	regions_df = pd.read_table(cfg['region_file'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
	regions_df = regions_df[regions_df['job'] == int(cfg['job'])].reset_index(drop=True)
	return_values = {}
	print ''
	try:
		bgzfile = bgzf.BgzfWriter(cfg['out'] + '.gz', 'wb')
	except:
		print Process.Error("failed to initialize bgzip format out file " + cfg['out'] + '.gz').out
		return 1

	if cfg['cpus'] > 1:
		pool = mp.Pool(cfg['cpus']-1)
		for i in xrange(1,cfg['cpus']):
			return_values[i] = pool.apply_async(process_regions, args=(regions_df,cfg,i,True,))
			print "submitting job on cpu " + str(i) + " of " + str(cfg['cpus'])
		pool.close()
		print "executing job for cpu " + str(cfg['cpus']) + " of " + str(cfg['cpus']) + " via main process"
		main_return = process_regions(regions_df,cfg,cfg['cpus'],True)
		pool.join()

		if 1 in [return_values[i].get() for i in return_values] or main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	else:
		main_return = process_regions(regions_df,cfg,1,True)
		if main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	for i in xrange(1,cfg['cpus']+1):
		try:
			logfile = open(cfg['out'] + '.cpu' + str(i) + '.log', 'r')
		except:
			print Process.Error("failed to initialize log file " + cfg['out'] + '.cpu' + str(i) + '.log').out
			return 1
		print logfile.read()
		logfile.close()
		os.remove(cfg['out'] + '.cpu' + str(i) + '.log')

	written = False
	for i in xrange(1,cfg['cpus']+1):
		out = '/'.join(cfg['out'].split('/')[0:-1]) + '/' + cfg['out'].split('/')[-1] + '.cpu' + str(i) + '.pkl'
		pkl = open(out,"rb")
		results_final,results_header = pickle.load(pkl)
		if not written:
			bgzfile.write('#' + '\t'.join(results_header) + '\n')
			written = True
		if results_final.shape[0] > 0:
			results_final.replace({'None': 'NA', 'nan': 'NA'}).to_csv(bgzfile, index=False, sep='\t', header=False, na_rep='NA', float_format='%.5g', columns = results_header, append=True)
		pkl.close()
		os.remove(out)

	bgzfile.close()
	print "indexing out file"
	try:
		pysam.tabix_index(cfg['out'] + '.gz',seq_col=0,start_col=1,end_col=1,force=True)
	except:
		print Process.Error('failed to generate index for file ' + cfg['out'] + '.gz').out
		return 1

	if cfg['snpeff']:
		from ConfigParser import SafeConfigParser
		from pkg_resources import resource_filename
		import subprocess
		import xlsxwriter
		import time
		ini = SafeConfigParser()
		ini.read(resource_filename('uga', 'settings.ini'))

		results_final = pd.read_table(cfg['out'] + '.gz')
		outdf = results_final[['#chr','pos','id','a1','a2']]
		outdf = outdf.rename(columns={'#chr':'#CHROM','pos':'POS','id':'ID','a1':'REF','a2':'ALT'})
		outdf['QUAL'] = None
		outdf['FILTER'] = None
		outdf['INFO'] = None
		outdf.to_csv(cfg['out'] + '.annot1',header=True, index=False, sep='\t')

		time.sleep(1)
		try:
			cmd = 'java -jar ' + ini.get('main','snpeff') + ' -s ' + cfg['out'] + '.annot.summary.html -v -canon GRCh37.75 ' + cfg['out'] + '.annot1 > ' + cfg['out'] + '.annot2'
			print cmd
			p = subprocess.Popen(cmd,shell=True)
			p.wait()
		except KeyboardInterrupt:
			kill_all(p.pid)
			print "canonical annotation process terminated by user"
			sys.exit(1)

		return
		time.sleep(1)
		try:
			cmd = 'java -jar ' + ini.get('main','snpsift') + ' extractFields -s "," -e "NA" ' + cfg['out'] + '.annot2 CHROM POS ID REF ALT "ANN[*].ALLELE" "ANN[*].EFFECT" "ANN[*].IMPACT" "ANN[*].GENE" "ANN[*].GENEID" "ANN[*].FEATURE" "ANN[*].FEATUREID" "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" "ANN[*].CDNA_LEN" "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" "ANN[*].AA_LEN" "ANN[*].DISTANCE" "ANN[*].ERRORS" | sed "s/ANN\[\*\]/ANN/g" > ' + cfg['out'] + '.annot'
			print cmd
			p = subprocess.Popen(cmd,shell=True)
			p.wait()
		except KeyboardInterrupt:
			kill_all(p.pid)
			print "SnpSift annotation process terminated by user"
			sys.exit(1)
		os.remove(cfg['out'] + '.annot1')
		os.remove(cfg['out'] + '.annot2')

		results_final = results_final.rename(columns={'#chr':'#CHROM','pos':'POS','id':'ID','a1':'REF','a2':'ALT'})
		annot = pd.read_table(cfg['out'] + '.annot')
		out = results_final.merge(annot,how='outer')

		out.fillna('NA',inplace=True)

		wkbk = xlsxwriter.Workbook(cfg['out'] + '.annot.xlsx')
		wksht = wkbk.add_worksheet()

		header_format = wkbk.add_format({'bold': True,
											 'align': 'center',
											 'valign': 'vcenter'})
		string_format = wkbk.add_format({'align': 'center', 'valign': 'center'})
		float_format = wkbk.add_format({'align': 'center', 'valign': 'center'})
		float_format.set_num_format('0.000')
		integer_format = wkbk.add_format({'align': 'center', 'valign': 'center'})
		integer_format.set_num_format('0')
		sci_format = wkbk.add_format({'align': 'center', 'valign': 'center'})
		sci_format.set_num_format('0.00E+00')
		i = 0
		for field in out.columns:
			wksht.write(0,i,field,header_format)
			i += 1

		i = 0
		for row in range(out.shape[0]):
			j = 0
			for field in out.columns:
				if field in ['#CHROM','POS'] or field.endswith('.filtered') or field.endswith('.n'):
					wksht.write(row+1,j,out[field][i], integer_format)
				elif field.endswith(('.p','hwe','hwe.unrel')):
					wksht.write(row+1,j,out[field][i], sci_format)
				elif field.endswith(('.effect','.stderr','.or','.z','freq','freq.unrel','rsq','rsq.unrel','callrate')):
					wksht.write(row+1,j,out[field][i], float_format)
				else:
					wksht.write(row+1,j,out[field][i], string_format)
				j += 1
			i += 1
		wksht.freeze_panes(1, 0)
		wkbk.close()

		os.remove(cfg['out'] + '.annot')

	print "process complete"
	return 0
Example #46
0
def RunFilter(args):
	cfg = Parse.generate_filter_cfg(args)
	Parse.print_filter_options(cfg)

	if not cfg['debug']:
		logging.disable(logging.CRITICAL)

	print ''
	print "loading file header"
	try:
		handle=pysam.TabixFile(filename=cfg['file'],parser=pysam.asTuple())
	except:
		print Process.Error("unable to load file header").out
		return 1
	header = [x for x in handle.header]
	cols = header[-1].split()

	found = True
	if not cfg['pcol'] in cols:
		print Process.Error("p-value column, --pcol, not found").out
		found = False
	if not cfg['bpcol'] in cols:
		print Process.Error("genomic position column, --bpcol, not found").out
		found = False
	if cfg['miss'] is not None and not cfg['misscol'] in cols:
		print Process.Error("callrate column, --misscol, not found; required for --miss option").out
		found = False
	if cfg['maf'] is not None and not cfg['freqcol'] in cols:
		print Process.Error("allele frequency column, --freqcol, not found; required for --maf option").out
		found = False
	if cfg['mac'] is not None and not cfg['maccol'] in cols:
		print Process.Error("minor allele count column, --maccol, not found; required for --mac option").out
		found = False
	if cfg['cmac'] is not None and not cfg['cmaccol'] in cols:
		print Process.Error("cumulative minor allele count column, --cmaccol, not found; required for --cmac option").out
		found = False
	if cfg['rsq'] is not None and not cfg['rsqcol'] in cols:
		print Process.Error("imputation quality (rsq) column, --rsqcol, not found; required for --rsq option").out
		found = False
	if cfg['hwe'] is not None and not cfg['hwecol'] in cols:
		print Process.Error("Hardy Weinberg p-value column, --hwecol, not found; required for --hwe option").out
		found = False
	if cfg['hwe_maf'] is not None and (not cfg['hwecol'] in cols or not cfg['freqcol'] in cols):
		print Process.Error("either Hardy Weinberg p-value or allele frequency column, --hwecol or --freqcol, not found; both required for --hwe-maf option").out
		found = False
	if not found:
		return 1

	print "reading data from file"
	skip_rows = len(header)-1
	cols = header[-1].split()
	try:
		r = pd.read_table(cfg['file'],sep='\t',skiprows=skip_rows,compression='gzip')
	except:
		print Process.Error("unable to read data from file " + cfg['file']).out
		return 1
	r = r.loc[~ np.isnan(r[cfg['pcol']])]
	print str(r.shape[0]) + " results found with valid p-values"

	nsnps = r.shape[0]
	if cfg['miss'] is not None:
		r = r.loc[r[cfg['misscol']] >= cfg['miss']]
	if cfg['maf'] is not None:
		r = r.loc[(r[cfg['freqcol']] >= cfg['maf']) & (r[cfg['freqcol']] <= 1-cfg['maf'])]
	if cfg['mac'] is not None:
		r = r.loc[r[cfg['maccol']] >= cfg['mac']]
	if cfg['cmac'] is not None:
		r = r.loc[r[cfg['cmaccol']] >= cfg['cmac']]
	if cfg['rsq'] is not None:
		r = r.loc[(~ np.isnan(r[cfg['rsqcol']])) & (r[cfg['rsqcol']] >= cfg['rsq'])]
	if cfg['hwe'] is not None:
		if cfg['hwe_maf'] is not None:
			r = r.loc[(~ np.isnan(r[cfg['hwecol']])) & (~ np.isnan(r[cfg['freqcol']])) & (~ (r[cfg['freqcol']] >= cfg['hwe_maf']) & (r[cfg['hwecol']] < cfg['hwe']))]
		else:
			r = r.loc[(~ np.isnan(r[cfg['hwecol']])) & (r[cfg['hwecol']] >= cfg['hwe'])]
	print str(r.shape[0]) + " results remain after filtering, " + str(nsnps - r.shape[0]) + " removed"

	if cfg['gc']:
		l = np.median(scipy.chi2.ppf([1-x for x in r.loc[~ np.isnan(r[cfg['pcol']]),cfg['pcol']].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		print "genomic inflation = " + str(l)

		if cfg['stderrcol'] in r.columns:
			print "adjusting stderr"
			r[cfg['stderrcol']] = r[cfg['stderrcol']] * math.sqrt(l)
		if cfg['waldcol'] in r.columns:
			print "adjusting wald statistic"
			r[cfg['waldcol']] = r[cfg['waldcol']] / math.sqrt(l)
			print "calculating corrected p-value from wald statistic"
			r[cfg['pcol']] = scipy.chisqprob(r[cfg['waldcol']],1)
		elif cfg['zcol'] in r.columns:
			print "adjusting z statistic"
			r[cfg['zcol']] = r[cfg['zcol']] / math.sqrt(l)
			print "calculating corrected p-value from z statistic"
			r[cfg['pcol']] = 2 * scipy.norm.cdf(-1 * np.abs(r[cfg['zcol']]))
		elif cfg['effectcol'] in r.columns and cfg['stderrcol'] in r.columns:
			print "calculating corrected p-value from effect and stderr using a calculated z statistic"
			r[cfg['pcol']] = 2 * scipy.norm.cdf(-1 * np.abs(r[cfg['effectcol']]) / r[cfg['stderrcol']])
		else:
			print "calculating corrected p-value from existing p-value using an estimated z statistic"
			r[cfg['pcol']] = 2 * scipy.norm.cdf(-1 * np.abs(scipy.norm.ppf(0.5*r[cfg['pcol']]) / math.sqrt(l)))

	print "writing filtered results to file"
	try:
		bgzfile = bgzf.BgzfWriter(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz'), 'wb')
	except:
		print Process.Error("unable to initialize out file " + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz')).out
		return 1
	bgzfile.write('\n'.join([x for x in handle.header]) + '\n')
	r[cols].to_csv(bgzfile,header=False,index=False,sep="\t",na_rep='NA', float_format='%.5g')
	bgzfile.close()
	handle.close()

	print "indexing out file"
	try:
		pysam.tabix_index(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz'),seq_col=0,start_col=r.columns.get_loc(cfg['bpcol']),end_col=r.columns.get_loc(cfg['bpcol']),force=True)
	except:
		print Process.Error('failed to generate index for file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz')).out
		return 1

	print "process complete"
	return 0
Example #47
0
def RunSnvplot(args):
	cfg = Parse.generate_snvplot_cfg(args)
	Parse.print_snvplot_options(cfg)

	if not cfg['debug']:
		logging.disable(logging.CRITICAL)

	ro.r('suppressMessages(library(ggplot2))')
	ro.r('suppressMessages(library(grid))')

	handle=pysam.TabixFile(filename=cfg['file'],parser=pysam.asVCF())
	header = [x for x in handle.header]
	skip_rows = len(header)-1
	cols = header[-1].split()
	pcols = cfg['pcol'].split(',')
	cols_extract = [cfg['chrcol'],cfg['bpcol']] + pcols
	if cfg['qq_strat_freq']:
		if cfg['freqcol'] not in cols:
			print Process.Error("frequency column " + cfg['freqcol'] + " not found, unable to proceed with frequency stratified plots").out
			return 1
		else:
			cols_extract = cols_extract + [cfg['freqcol']]
			print "frequency column " + cfg['freqcol'] + " found"
	if cfg['qq_strat_mac']:
		if cfg['maccol'] not in cols:
			print Process.Error("minor allele count column " + cfg['maccol'] + " not found, unable to proceed with minor allele count stratified plots").out
			return 1
		else:
			cols_extract = cols_extract + [cfg['maccol']]
			print "minor allele count column " + cfg['maccol'] + " found"

	print "importing data"
	r = pd.read_table(cfg['file'],sep='\t',skiprows=skip_rows,usecols=cols_extract,compression='gzip')
	print str(r.shape[0]) + " total variants found"

	for pcol in pcols:
		print "plotting p-values for column " + pcol + " ..."
		results = r[[cfg['chrcol'],cfg['bpcol'],cfg['freqcol'],pcol]] if cfg['freqcol'] in r else r[[cfg['chrcol'],cfg['bpcol'],pcol]]
		results.dropna(inplace=True)
		results = results[(results[pcol] > 0) & (results[pcol] <= 1)].reset_index(drop=True)
		print "   " + str(results.shape[0]) + " variants with plottable p-values"

		results['logp'] = -1 * np.log10(results[pcol]) + 0.0

		ro.globalenv['results'] = results
		l = np.median(scipy.chi2.ppf([1-x for x in results[pcol].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		# in R: median(qchisq(results$p, df=1, lower.tail=FALSE))/qchisq(0.5,1)
		print "   genomic inflation (all variants) = " + str(l)

		if cfg['qq']:
			print "   generating standard qq plot"
			print "   minimum p-value: " + str(np.min(results[pcol]))
			a = -1 * np.log10(ro.r('ppoints(' + str(len(results.index)) + ')'))
			a.sort()
			results.sort_values(by=['logp'], inplace=True)
			print "   maximum -1*log10(p-value): " + str(np.max(results['logp']))

			ci_upper = -1 * np.log10(scipy.beta.ppf(0.95, range(1,len(results[pcol]) + 1), range(len(results[pcol]),0,-1)))
			ci_upper.sort()
			ci_lower = -1 * np.log10(scipy.beta.ppf(0.05, range(1,len(results[pcol]) + 1), range(len(results[pcol]),0,-1)))
			ci_lower.sort()
			
			ro.globalenv['df'] = ro.DataFrame({'a': ro.FloatVector(a), 'b': ro.FloatVector(results['logp']), 'ci_lower': ro.FloatVector(ci_lower), 'ci_upper': ro.FloatVector(ci_upper)})
			dftext_label = 'lambda %~~% ' + str(l)
			ro.globalenv['dftext'] = ro.DataFrame({'x': ro.r('Inf'), 'y': 0.5, 'lab': dftext_label})

			if cfg['ext'] == 'tiff':
				ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq.tiff')
			elif cfg['ext'] == 'eps':
				ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq.eps')
			else:
				ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq.pdf')
			ro.r("""
				gp<-ggplot(df)
				pp<-gp + 
					aes_string(x='a',y='b') +
					geom_ribbon(aes_string(x='a',ymin='ci_lower',ymax='ci_upper'), data=df, alpha=0.25, fill='black') + 
					geom_point(size=2) +
					geom_abline(intercept=0, slope=1, alpha=0.5) + 
					scale_x_discrete(expression(Expected~~-log[10](italic(p)))) +
					scale_y_discrete(expression(Observed~~-log[10](italic(p)))) +
					coord_fixed() +
					theme_bw(base_size = 12) + 
					geom_text(aes_string(x='x', y='y', label='lab'), data = dftext, colour="black", vjust=0, hjust=1, size = 4, parse=TRUE) +
					theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.position = 'none', 
						panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
						panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
				%s
				""" % (ggsave))

			if np.max(results['logp']) > cfg['crop']:
				print "   generating cropped standard qq plot"
				ro.r('df$b[df$b > ' + str(cfg['crop']) + ']<-' + str(cfg['crop']))
				ro.r('df$shape<-0')
				ro.r('df$shape[df$b == ' + str(cfg['crop']) + ']<-1')
				if cfg['ext'] == 'tiff':
					ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq.cropped.tiff')
				elif cfg['ext'] == 'eps':
					ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq.cropped.eps')
				else:
					ggsave = 'ggsave(filename="%s",plot=pp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq.cropped.pdf')
				ro.r("""
					gp<-ggplot(df)
					pp<-gp + 
						aes_string(x='a',y='b') +
						geom_ribbon(aes_string(x='a',ymin='ci_lower',ymax='ci_upper'), data=df, alpha=0.25, fill='black') + 
						geom_point(aes(shape=factor(shape)),size=2) +
						geom_abline(intercept=0, slope=1, alpha=0.5) + 
						scale_x_discrete(expression(Expected~~-log[10](italic(p)))) +
						scale_y_discrete(expression(Observed~~-log[10](italic(p)))) +
						coord_fixed() +
						theme_bw(base_size = 12) + 
						geom_text(aes_string(x='x', y='y', label='lab'), data = dftext, colour="black", vjust=0, hjust=1, size = 4, parse=TRUE) +
						theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.position = 'none', 
							panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
							panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
					%s
					""" % (ggsave))

		if cfg['qq_strat_freq']:
			print "   generating frequency stratified qq plot"
			
			
			
			strat_ticks = [0.005, 0.01, 0.03, 0.05]
			
			
			
			
			results['UGA___QQ_BIN___'] = 'E'
			
			results.loc[(results[cfg['freqcol']] >= 0.01) & (results[cfg['freqcol']] <= 0.99),'UGA___QQ_BIN___'] = 'D'
			results.loc[(results[cfg['freqcol']] >= 0.03) & (results[cfg['freqcol']] <= 0.97),'UGA___QQ_BIN___'] = 'C'
			results.loc[(results[cfg['freqcol']] >= 0.05) & (results[cfg['freqcol']] <= 0.95),'UGA___QQ_BIN___'] = 'B'
			results.loc[(results[cfg['freqcol']] >= 0.1) & (results[cfg['freqcol']] <= 0.9),'UGA___QQ_BIN___'] = 'A'
			lA='NA'
			lB='NA'
			lC='NA'
			lD='NA'
			lE='NA'
			lE_n=len(results[pcol][(results[cfg['freqcol']] < 0.01) | (results[cfg['freqcol']] > 0.99)])
			lD_n=len(results[pcol][((results[cfg['freqcol']] >= 0.01) & (results[cfg['freqcol']] < 0.03)) | ((results[cfg['freqcol']] <= 0.99) & (results[cfg['freqcol']] > 0.97))])
			lC_n=len(results[pcol][((results[cfg['freqcol']] >= 0.03) & (results[cfg['freqcol']] < 0.05)) | ((results[cfg['freqcol']] <= 0.97) & (results[cfg['freqcol']] > 0.95))])
			lB_n=len(results[pcol][((results[cfg['freqcol']] >= 0.05) & (results[cfg['freqcol']] < 0.1)) | ((results[cfg['freqcol']] <= 0.95) & (results[cfg['freqcol']] > 0.9))])
			lA_n=len(results[pcol][(results[cfg['freqcol']] >= 0.1) & (results[cfg['freqcol']] <= 0.9)])
			if lE_n > 0:
				lE=np.median(scipy.chi2.ppf([1-x for x in results[pcol][(results[cfg['freqcol']] < 0.01) | (results[cfg['freqcol']] > 0.99)].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
			if lD_n > 0:
				lD=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['freqcol']] >= 0.01) & (results[cfg['freqcol']] < 0.03)) | ((results[cfg['freqcol']] <= 0.99) & (results[cfg['freqcol']] > 0.97))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
			if lC_n > 0:
				lC=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['freqcol']] >= 0.03) & (results[cfg['freqcol']] < 0.05)) | ((results[cfg['freqcol']] <= 0.97) & (results[cfg['freqcol']] > 0.95))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
			if lB_n > 0:
				lB=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['freqcol']] >= 0.05) & (results[cfg['freqcol']] < 0.1)) | ((results[cfg['freqcol']] <= 0.95) & (results[cfg['freqcol']] > 0.9))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
			if lA_n > 0:
				lA=np.median(scipy.chi2.ppf([1-x for x in results[pcol][(results[cfg['freqcol']] >= 0.1) & (results[cfg['freqcol']] <= 0.9)].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
			print "   genomic inflation (MAF >= 10%, n=" + str(lA_n) + ") = " + str(lA)
			print "   genomic inflation (5% <= MAF < 10%, n=" + str(lB_n) + ") = " + str(lB)
			print "   genomic inflation (3% <= MAF < 5%, n=" + str(lC_n) + ") = " + str(lC)
			print "   genomic inflation (1% <= MAF < 3%, n=" + str(lD_n) + ") = " + str(lD)
			print "   genomic inflation (MAF < 1%, n=" + str(lE_n) + ") = " + str(lE)

			a = np.array([])
			b = np.array([])
			c = np.array([])
			results.sort_values(by=['logp'], inplace=True)
			if len(results[results['UGA___QQ_BIN___'] == 'E'].index) > 0:
				aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA___QQ_BIN___'] == 'E'].index)) + ')'))
				aa.sort()
				bb = results['logp'][results['UGA___QQ_BIN___'] == 'E']
				#bb.sort()
				cc = results['UGA___QQ_BIN___'][results['UGA___QQ_BIN___'] == 'E']
				a = np.append(a,aa)
				b = np.append(b,bb)
				c = np.append(c,cc)
				print "   minimum p-value (MAF < 1%): " + str(np.min(results[pcol][results['UGA___QQ_BIN___'] == 'E']))
				print "   maximum -1*log10(p-value) (MAF < 1%): " + str(np.max(results['logp'][results['UGA___QQ_BIN___'] == 'E']))
			if len(results[results['UGA___QQ_BIN___'] == 'D'].index) > 0:
				aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA___QQ_BIN___'] == 'D'].index)) + ')'))
				aa.sort()
				bb = results['logp'][results['UGA___QQ_BIN___'] == 'D']
				#bb.sort()
				cc = results['UGA___QQ_BIN___'][results['UGA___QQ_BIN___'] == 'D']
				a = np.append(a,aa)
				b = np.append(b,bb)
				c = np.append(c,cc)
				print "   minimum p-value (1% <= MAF < 3%): " + str(np.min(results[pcol][results['UGA___QQ_BIN___'] == 'D']))
				print "   maximum -1*log10(p-value) (1% <= MAF < 3%): " + str(np.max(results['logp'][results['UGA___QQ_BIN___'] == 'D']))
			if len(results[results['UGA___QQ_BIN___'] == 'C'].index) > 0:
				aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA___QQ_BIN___'] == 'C'].index)) + ')'))
				aa.sort()
				bb = results['logp'][results['UGA___QQ_BIN___'] == 'C']
				#bb.sort()
				cc = results['UGA___QQ_BIN___'][results['UGA___QQ_BIN___'] == 'C']
				a = np.append(a,aa)
				b = np.append(b,bb)
				c = np.append(c,cc)
				print "   minimum p-value (3% <= MAF < 5%): " + str(np.min(results[pcol][results['UGA___QQ_BIN___'] == 'C']))
				print "   maximum -1*log10(p-value) (3% <= MAF < 5%): " + str(np.max(results['logp'][results['UGA___QQ_BIN___'] == 'C']))
			if len(results[results['UGA___QQ_BIN___'] == 'B'].index) > 0:
				aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA___QQ_BIN___'] == 'B'].index)) + ')'))
				aa.sort()
				bb = results['logp'][results['UGA___QQ_BIN___'] == 'B']
				#bb.sort()
				cc = results['UGA___QQ_BIN___'][results['UGA___QQ_BIN___'] == 'B']
				a = np.append(a,aa)
				b = np.append(b,bb)
				c = np.append(c,cc)
				print "   minimum p-value (5% <= MAF < 10%): " + str(np.min(results[pcol][results['UGA___QQ_BIN___'] == 'B']))
				print "   maximum -1*log10(p-value) (5% <= MAF < 10%): " + str(np.max(results['logp'][results['UGA___QQ_BIN___'] == 'B']))
			if len(results[results['UGA___QQ_BIN___'] == 'A'].index) > 0:
				aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA___QQ_BIN___'] == 'A'].index)) + ')'))
				aa.sort()
				bb = results['logp'][results['UGA___QQ_BIN___'] == 'A']
				#bb.sort()
				cc = results['UGA___QQ_BIN___'][results['UGA___QQ_BIN___'] == 'A']
				a = np.append(a,aa)
				b = np.append(b,bb)
				c = np.append(c,cc)
				print "   minimum p-value (MAF >= 10%): " + str(np.min(results[pcol][results['UGA___QQ_BIN___'] == 'A']))
				print "   maximum -1*log10(p-value) (MAF >= 10%): " + str(np.max(results['logp'][results['UGA___QQ_BIN___'] == 'A']))
        
			ro.globalenv['df'] = ro.DataFrame({'a': ro.FloatVector(a), 'b': ro.FloatVector(b), 'UGA___QQ_BIN___': ro.StrVector(c)})
        
			if cfg['ext'] == 'tiff':
				ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.tiff')
			elif cfg['ext'] == 'eps':
				ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.eps')
			else:
				ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.pdf')
			ro.r("""
				gp<-ggplot(df, aes_string(x='a',y='b')) +
					geom_point(aes_string(color='UGA___QQ_BIN___'), size=2) +
					scale_colour_manual(values=c("E"="#a8ddb5", "D"="#7bccc4", "C"="#4eb3d3", "B"="#2b8cbe", "A"="#08589e"), labels=c("E"="MAF < 1%%","D"="1%% <= MAF < 3%%","C"="3%% <= MAF < 5%%","B"="5%% <= MAF < 10%%","A"="MAF >= 10%%")) +
					geom_abline(intercept=0, slope=1, alpha=0.5) + 
					scale_x_discrete(expression(Expected~~-log[10](italic(p)))) +
					scale_y_discrete(expression(Observed~~-log[10](italic(p)))) +
					coord_fixed() +
					theme_bw(base_size = 12) + 
					theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.title = element_blank(), 
						legend.key.height = unit(0.1,"in"), legend.text = element_text(size=5), legend.key = element_blank(), legend.justification = c(0,1), 
						legend.position = c(0,1), panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
						panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
				%s
				""" % (ggsave))
        
			if np.max(results['logp']) > cfg['crop']:
				print "   generating cropped frequency stratified qq plot"
				ro.r('df$b[df$b > ' + str(cfg['crop']) + ']<-' + str(cfg['crop']))
				ro.r('df$shape<-0')
				ro.r('df$shape[df$b == ' + str(cfg['crop']) + ']<-1')
				if cfg['ext'] == 'tiff':
					ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.cropped.tiff')
				elif cfg['ext'] == 'eps':
					ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.cropped.eps')
				else:
					ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq_strat_freq.cropped.pdf')
				ro.r("""
					gp<-ggplot(df, aes_string(x='a',y='b')) +
						geom_point(aes(shape=factor(shape), color=UGA_MAF), size=2) +
						scale_colour_manual(values=c("E"="#a8ddb5", "D"="#7bccc4", "C"="#4eb3d3", "B"="#2b8cbe", "A"="#08589e"), labels=c("E"="MAF < 1%%","D"="1%% <= MAF < 3%%","C"="3%% <= MAF < 5%%","B"="5%% <= MAF < 10%%","A"="MAF >= 10%%")) +
						geom_abline(intercept=0, slope=1, alpha=0.5) + 
						scale_x_discrete(expression(Expected~~-log[10](italic(p)))) +
						scale_y_discrete(expression(Observed~~-log[10](italic(p)))) +
						coord_fixed() +
						theme_bw(base_size = 12) + 
						guides(shape=FALSE) + 
						theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.title = element_blank(), 
							legend.key.height = unit(0.1,"in"), legend.text = element_text(size=5), legend.key = element_blank(), legend.justification = c(0,1), 
							legend.position = c(0,1), panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
							panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
					%s
					""" % (ggsave))
					
		#if cfg['qq_strat_mac']:
		#	print "   generating minor allele count stratified qq plot"
		#
		#	results['UGA_MAC'] = 'E'
		#	results.loc[results[cfg['maccol']] < 5),'UGA_MAC'] = 'D'
		#	results.loc[(results[cfg['maccol']] >= 0.03) & (results[cfg['maccol']] <= 0.97),'UGA_MAC'] = 'C'
		#	results.loc[(results[cfg['maccol']] >= 0.05) & (results[cfg['maccol']] <= 0.95),'UGA_MAC'] = 'B'
		#	results.loc[(results[cfg['maccol']] >= 0.1) & (results[cfg['maccol']] <= 0.9),'UGA_MAC'] = 'A'
		#	lA='NA'
		#	lB='NA'
		#	lC='NA'
		#	lD='NA'
		#	lE='NA'
		#	lE_n=len(results[pcol][(results[cfg['maccol']] < 0.01) | (results[cfg['maccol']] > 0.99)])
		#	lD_n=len(results[pcol][((results[cfg['maccol']] >= 0.01) & (results[cfg['maccol']] < 0.03)) | ((results[cfg['maccol']] <= 0.99) & (results[cfg['maccol']] > 0.97))])
		#	lC_n=len(results[pcol][((results[cfg['maccol']] >= 0.03) & (results[cfg['maccol']] < 0.05)) | ((results[cfg['maccol']] <= 0.97) & (results[cfg['maccol']] > 0.95))])
		#	lB_n=len(results[pcol][((results[cfg['maccol']] >= 0.05) & (results[cfg['maccol']] < 0.1)) | ((results[cfg['maccol']] <= 0.95) & (results[cfg['maccol']] > 0.9))])
		#	lA_n=len(results[pcol][(results[cfg['maccol']] >= 0.1) & (results[cfg['maccol']] <= 0.9)])
		#	if lE_n > 0:
		#		lE=np.median(scipy.chi2.ppf([1-x for x in results[pcol][(results[cfg['maccol']] < 0.01) | (results[cfg['maccol']] > 0.99)].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		#	if lD_n > 0:
		#		lD=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['maccol']] >= 0.01) & (results[cfg['maccol']] < 0.03)) | ((results[cfg['maccol']] <= 0.99) & (results[cfg['maccol']] > 0.97))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		#	if lC_n > 0:
		#		lC=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['maccol']] >= 0.03) & (results[cfg['maccol']] < 0.05)) | ((results[cfg['maccol']] <= 0.97) & (results[cfg['maccol']] > 0.95))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		#	if lB_n > 0:
		#		lB=np.median(scipy.chi2.ppf([1-x for x in results[pcol][((results[cfg['maccol']] >= 0.05) & (results[cfg['maccol']] < 0.1)) | ((results[cfg['maccol']] <= 0.95) & (results[cfg['maccol']] > 0.9))].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		#	if lA_n > 0:
		#		lA=np.median(scipy.chi2.ppf([1-x for x in results[pcol][(results[cfg['maccol']] >= 0.1) & (results[cfg['maccol']] <= 0.9)].tolist()], df=1))/scipy.chi2.ppf(0.5,1)
		#	print "   genomic inflation (MAF >= 10%, n=" + str(lA_n) + ") = " + str(lA)
		#	print "   genomic inflation (5% <= MAF < 10%, n=" + str(lB_n) + ") = " + str(lB)
		#	print "   genomic inflation (3% <= MAF < 5%, n=" + str(lC_n) + ") = " + str(lC)
		#	print "   genomic inflation (1% <= MAF < 3%, n=" + str(lD_n) + ") = " + str(lD)
		#	print "   genomic inflation (MAF < 1%, n=" + str(lE_n) + ") = " + str(lE)
        #
		#	a = np.array([])
		#	b = np.array([])
		#	c = np.array([])
		#	results.sort_values(by=['logp'], inplace=True)
		#	if len(results[results['UGA_MAC'] == 'E'].index) > 0:
		#		aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA_MAC'] == 'E'].index)) + ')'))
		#		aa.sort()
		#		bb = results['logp'][results['UGA_MAC'] == 'E']
		#		#bb.sort()
		#		cc = results['UGA_MAC'][results['UGA_MAC'] == 'E']
		#		a = np.append(a,aa)
		#		b = np.append(b,bb)
		#		c = np.append(c,cc)
		#		print "   minimum p-value (MAF < 1%): " + str(np.min(results[pcol][results['UGA_MAC'] == 'E']))
		#		print "   maximum -1*log10(p-value) (MAF < 1%): " + str(np.max(results['logp'][results['UGA_MAC'] == 'E']))
		#	if len(results[results['UGA_MAC'] == 'D'].index) > 0:
		#		aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA_MAC'] == 'D'].index)) + ')'))
		#		aa.sort()
		#		bb = results['logp'][results['UGA_MAC'] == 'D']
		#		#bb.sort()
		#		cc = results['UGA_MAC'][results['UGA_MAC'] == 'D']
		#		a = np.append(a,aa)
		#		b = np.append(b,bb)
		#		c = np.append(c,cc)
		#		print "   minimum p-value (1% <= MAF < 3%): " + str(np.min(results[pcol][results['UGA_MAC'] == 'D']))
		#		print "   maximum -1*log10(p-value) (1% <= MAF < 3%): " + str(np.max(results['logp'][results['UGA_MAC'] == 'D']))
		#	if len(results[results['UGA_MAC'] == 'C'].index) > 0:
		#		aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA_MAC'] == 'C'].index)) + ')'))
		#		aa.sort()
		#		bb = results['logp'][results['UGA_MAC'] == 'C']
		#		#bb.sort()
		#		cc = results['UGA_MAC'][results['UGA_MAC'] == 'C']
		#		a = np.append(a,aa)
		#		b = np.append(b,bb)
		#		c = np.append(c,cc)
		#		print "   minimum p-value (3% <= MAF < 5%): " + str(np.min(results[pcol][results['UGA_MAC'] == 'C']))
		#		print "   maximum -1*log10(p-value) (3% <= MAF < 5%): " + str(np.max(results['logp'][results['UGA_MAC'] == 'C']))
		#	if len(results[results['UGA_MAC'] == 'B'].index) > 0:
		#		aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA_MAC'] == 'B'].index)) + ')'))
		#		aa.sort()
		#		bb = results['logp'][results['UGA_MAC'] == 'B']
		#		#bb.sort()
		#		cc = results['UGA_MAC'][results['UGA_MAC'] == 'B']
		#		a = np.append(a,aa)
		#		b = np.append(b,bb)
		#		c = np.append(c,cc)
		#		print "   minimum p-value (5% <= MAF < 10%): " + str(np.min(results[pcol][results['UGA_MAC'] == 'B']))
		#		print "   maximum -1*log10(p-value) (5% <= MAF < 10%): " + str(np.max(results['logp'][results['UGA_MAC'] == 'B']))
		#	if len(results[results['UGA_MAC'] == 'A'].index) > 0:
		#		aa = -1 * np.log10(ro.r('ppoints(' + str(len(results[results['UGA_MAC'] == 'A'].index)) + ')'))
		#		aa.sort()
		#		bb = results['logp'][results['UGA_MAC'] == 'A']
		#		#bb.sort()
		#		cc = results['UGA_MAC'][results['UGA_MAC'] == 'A']
		#		a = np.append(a,aa)
		#		b = np.append(b,bb)
		#		c = np.append(c,cc)
		#		print "   minimum p-value (MAF >= 10%): " + str(np.min(results[pcol][results['UGA_MAC'] == 'A']))
		#		print "   maximum -1*log10(p-value) (MAF >= 10%): " + str(np.max(results['logp'][results['UGA_MAC'] == 'A']))
        #
		#	ro.globalenv['df'] = ro.DataFrame({'a': ro.FloatVector(a), 'b': ro.FloatVector(b), 'UGA_MAC': ro.StrVector(c)})
        #
		#	if cfg['ext'] == 'tiff':
		#		ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq_strat.tiff')
		#	elif cfg['ext'] == 'eps':
		#		ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq_strat.eps')
		#	else:
		#		ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq_strat.pdf')
		#	ro.r("""
		#		gp<-ggplot(df, aes_string(x='a',y='b')) +
		#			geom_point(aes_string(color='UGA_MAC'), size=2) +
		#			scale_colour_manual(values=c("E"="#a8ddb5", "D"="#7bccc4", "C"="#4eb3d3", "B"="#2b8cbe", "A"="#08589e"), labels=c("E"="MAF < 1%%","D"="1%% <= MAF < 3%%","C"="3%% <= MAF < 5%%","B"="5%% <= MAF < 10%%","A"="MAF >= 10%%")) +
		#			geom_abline(intercept=0, slope=1, alpha=0.5) + 
		#			scale_x_continuous(expression(Expected~~-log[10](italic(p)))) +
		#			scale_y_continuous(expression(Observed~~-log[10](italic(p)))) +
		#			theme_bw(base_size = 12) + 
		#			theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.title = element_blank(), 
		#				legend.key.height = unit(0.1,"in"), legend.text = element_text(size=5), legend.key = element_blank(), legend.justification = c(0,1), 
		#				legend.position = c(0,1), panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
		#				panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
		#		%s
		#		""" % (ggsave))
        #
		#	if np.max(results['logp']) > cfg['crop']:
		#		print "   generating cropped frequency stratified qq plot"
		#		ro.r('df$b[df$b > ' + str(cfg['crop']) + ']<-' + str(cfg['crop']))
		#		ro.r('df$shape<-0')
		#		ro.r('df$shape[df$b == ' + str(cfg['crop']) + ']<-1')
		#		if cfg['ext'] == 'tiff':
		#			ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.qq_strat.cropped.tiff')
		#		elif cfg['ext'] == 'eps':
		#			ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.qq_strat.cropped.eps')
		#		else:
		#			ggsave = 'ggsave(filename="%s",plot=gp,width=4,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.qq_strat.cropped.pdf')
		#		ro.r("""
		#			gp<-ggplot(df, aes_string(x='a',y='b')) +
		#				geom_point(aes(shape=factor(shape), color=UGA_MAC), size=2) +
		#				scale_colour_manual(values=c("E"="#a8ddb5", "D"="#7bccc4", "C"="#4eb3d3", "B"="#2b8cbe", "A"="#08589e"), labels=c("E"="MAF < 1%%","D"="1%% <= MAF < 3%%","C"="3%% <= MAF < 5%%","B"="5%% <= MAF < 10%%","A"="MAF >= 10%%")) +
		#				geom_abline(intercept=0, slope=1, alpha=0.5) + 
		#				scale_x_continuous(expression(Expected~~-log[10](italic(p)))) +
		#				scale_y_continuous(expression(Observed~~-log[10](italic(p)))) +
		#				theme_bw(base_size = 12) + 
		#				guides(shape=FALSE) + 
		#				theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), legend.title = element_blank(), 
		#					legend.key.height = unit(0.1,"in"), legend.text = element_text(size=5), legend.key = element_blank(), legend.justification = c(0,1), 
		#					legend.position = c(0,1), panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
		#					panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.text = element_text(size=12))
		#			%s
		#			""" % (ggsave))

		if cfg['mht']:
			print "   generating standard manhattan plot"
			print "   minimum p-value: " + str(np.min(results[pcol]))
			print "   maximum -1*log10(p-value): " + str(np.max(results['logp']))
			if cfg['gc'] and l > 1:
				print "   adjusting p-values for genomic inflation for p-value column " + pcol
				results[pcol]=2 * scipy.norm.cdf(-1 * np.abs(scipy.norm.ppf(0.5*results[pcol]) / math.sqrt(l)))
				print "   minimum post-gc adjustment p-value: " + str(np.min(results[pcol]))
				print "   maximum post-gc adjustment -1*log10(p-value): " + str(np.max(results['logp']))
			else:
				print "   skipping genomic inflation correction"

			print "   calculating genomic positions"
			results.sort_values(by=[cfg['chrcol'],cfg['bpcol']], inplace=True)
			ticks = []
			lastbase = 0
			results['gpos'] = 0
			nchr = len(list(np.unique(results[cfg['chrcol']].values)))
			chrs = np.unique(results[cfg['chrcol']].values)
			if cfg['color']:
				colours = ["#08306B","#41AB5D","#000000","#F16913","#3F007D","#EF3B2C","#08519C","#238B45","#252525","#D94801","#54278F","#CB181D","#2171B5","#006D2C","#525252","#A63603","#6A51A3","#A50F15","#4292C6","#00441B","#737373","#7F2704","#807DBA","#67000D"]
			else:
				colours = ["#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3","#08589e","#4eb3d3"]
			if nchr == 1:
				results['gpos'] = results[cfg['bpcol']]
				results['colours'] = "#08589e"
				if results['gpos'].max() - results['gpos'].min() <= 1000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 100 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 10000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 1000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 100000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 10000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 200000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 20000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 300000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 30000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 400000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 40000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 500000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 50000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 600000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 60000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 700000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 70000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 800000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 80000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 900000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 90000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 1000000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 100000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 10000000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 1000000 == 0]
				elif results['gpos'].max() - results['gpos'].min() <= 100000000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 10000000 == 0]
				elif results['gpos'].max() - results['gpos'].min() > 100000000:
					ticks = [x for x in range(results['gpos'].min(),results['gpos'].max()) if x % 25000000 == 0]
			else:
				results['colours'] = "#000000"
				for i in range(len(chrs)):
					print "      processed chromosome " + str(int(chrs[i]))
					if i == 0:
						results.loc[results[cfg['chrcol']] == chrs[i],'gpos'] = results.loc[results[cfg['chrcol']] == chrs[i],cfg['bpcol']]
					else:
						lastbase = lastbase + results.loc[results[cfg['chrcol']] == chrs[i-1],cfg['bpcol']].iloc[-1]
						results.loc[results[cfg['chrcol']] == chrs[i],'gpos'] = (results.loc[results[cfg['chrcol']] == chrs[i],cfg['bpcol']]) + lastbase
					if results.loc[results[cfg['chrcol']] == chrs[i]].shape[0] > 1:
						ticks.append(results.loc[results[cfg['chrcol']] == chrs[i],'gpos'].iloc[0] + (results.loc[results[cfg['chrcol']] == chrs[i],'gpos'].iloc[-1] - results.loc[results[cfg['chrcol']] == chrs[i],'gpos'].iloc[0])/2)
					else:
						ticks.append(results.loc[results[cfg['chrcol']] == chrs[i],'gpos'].iloc[0])
					results.loc[results[cfg['chrcol']] == chrs[i],'colours'] = colours[int(chrs[i])]
			results['logp'] = -1 * np.log10(results[pcol])
			if results.shape[0] >= 1000000:
				sig = 5.4e-8
			else:
				sig = 0.05 / results.shape[0]
			print "   significance level set to p-value = " + str(sig) + " (-1*log10(p-value) = " + str(-1 * np.log10(sig)) + ")"
			print "   " + str(len(results[pcol][results[pcol] <= sig])) + " genome wide significant variants"
			chr = results[cfg['chrcol']][0]
			maxy=int(max(np.ceil(-1 * np.log10(sig)),np.ceil(results['logp'].max())))
			if maxy > 20:
				y_breaks = range(0,maxy,5)
				y_labels = range(0,maxy,5)
			else:
				y_breaks = range(0,maxy)
				y_labels = range(0,maxy)
			ro.globalenv['df'] = ro.DataFrame({'gpos': ro.FloatVector(results['gpos']), 'logp': ro.FloatVector(results['logp']), 'colours': ro.FactorVector(results['colours'])})
			ro.globalenv['ticks'] = ro.FloatVector(ticks)
			ro.globalenv['labels'] = ro.Vector(["{:,}".format(x/1000) for x in ticks])
			ro.globalenv['colours'] = ro.StrVector(colours)
			ro.globalenv['chrs'] = ro.FloatVector(chrs)

			print "   generating manhattan plot"
			if cfg['ext'] == 'tiff':
				ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.mht.tiff')
			elif cfg['ext'] == 'eps':
				ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.mht.eps')
			else:
				ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.mht.pdf')
			if nchr == 1:
				ro.r("""
					gp<-ggplot(df, aes_string(x='gpos',y='logp')) +
						geom_hline(yintercept = -1 * log10(%g),colour="#B8860B", linetype=5, size = 0.25) + 
						geom_point(size=1.5) + 
						scale_x_continuous(expression(Chromosome~~%d~~(kb))'),breaks=ticks,labels=labels) + \
						scale_y_continuous(expression(-log[10](italic(p))),breaks=seq(0,%d,1),limits=c(0,%d)) + \
						theme_bw(base_size = 8) + \
						theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), 
								panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
								panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.title = element_text(size=10), 
								axis.text = element_text(size=12), legend.position = 'none')
					%s
					""" % (sig, chr, maxy, maxy, ggsave))
			else:
				ro.r("""
					gp = ggplot(df, aes_string(x='gpos',y='logp',colour='colours')) + 
						geom_hline(yintercept = -1 * log10(%g),colour="#B8860B", linetype=5, size = 0.25) + 
						geom_point(size=1.5) + 
						scale_colour_manual(values=colours) + 
						scale_x_continuous(expression(Chromosome),breaks=ticks,labels=chrs) + 
						scale_y_continuous(expression(-log[10](italic(p))),breaks=seq(0,%d,1),limits=c(0,%d)) + 
						theme_bw(base_size = 8) + 
						theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), 
								panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
								panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.title = element_text(size=10), 
								axis.text = element_text(size=12), legend.position = 'none')
					%s
					""" % (sig, maxy, maxy, ggsave))

			if maxy > cfg['crop']:
				maxy = cfg['crop']
				ro.r('df$logp[df$logp > ' + str(cfg['crop']) + ']<-' + str(cfg['crop']))
				ro.r('df$shape<-0')
				ro.r('df$shape[df$logp == ' + str(cfg['crop']) + ']<-1')
				print "   generating cropped manhattan plot"
				if cfg['ext'] == 'tiff':
					ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,units="in",bg="white",compression="lzw",dpi=300)' % (cfg['out'] + '.' + pcol + '.mht.cropped.tiff')
				elif cfg['ext'] == 'eps':
					ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,bg="white",horizontal=True)' % (cfg['out'] + '.' + pcol + '.mht.cropped.eps')
				else:
					ggsave = 'ggsave(filename="%s",plot=gp,width=16,height=4,bg="white")' % (cfg['out'] + '.' + pcol + '.mht.cropped.pdf')
				if nchr == 1:
					ro.r("""
						gp<-ggplot(df, aes_string(x='gpos',y='logp')) +
							geom_hline(yintercept = -1 * log10(%g),colour="#B8860B", linetype=5, size = 0.25) + 
							geom_point(aes(shape=factor(shape)),size=1.5) + 
							scale_x_continuous(expression(Chromosome~~%d~~(kb))'),breaks=ticks,labels=labels) + 
							scale_y_continuous(expression(-log[10](italic(p))),breaks=seq(0,%d,1),limits=c(0,%d)) + 
							theme_bw(base_size = 8) + 
							theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), 
									panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
									panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.title = element_text(size=10), 
									axis.text = element_text(size=12), legend.position = 'none')
						%s
						""" % (sig, chr, maxy, maxy, ggsave))
				else:
					ro.r("""
						gp = ggplot(df, aes_string(x='gpos',y='logp',colour='colours')) + 
							geom_hline(yintercept = -1 * log10(%g),colour="#B8860B", linetype=5, size = 0.25) + 
							geom_point(aes(shape=factor(shape)),size=1.5) + 
							scale_colour_manual(values=colours) + 
							scale_x_continuous(expression(Chromosome),breaks=ticks,labels=chrs) + 
							scale_y_continuous(expression(-log[10](italic(p))),breaks=seq(0,%d,1),limits=c(0,%d)) + 
							theme_bw(base_size = 8) + 
							theme(axis.title.x = element_text(vjust=-0.5,size=14), axis.title.y = element_text(vjust=1,angle=90,size=14), 
									panel.background = element_blank(), panel.border = element_blank(), panel.grid.minor = element_blank(), 
									panel.grid.major = element_blank(), axis.line = element_line(colour="black"), axis.title = element_text(size=8), 
									axis.text = element_text(size=12), legend.position = 'none')
						%s
						""" % (sig, maxy, maxy, ggsave))

	print "process complete"
	return 0
Example #48
0
def main():
    print("Test")
    Parse.run_command("bottest.js", "console.log(\"Ran bottest.js\")")
def num_invalid_valuations(formula):
    all_val = all_valuations(atoms(Parse.parse(formula)))
    return len(all_val) - num_valid_valuations(formula)
Example #50
0
 def Mpi_slave(self, result_out, buffer_size, compair):
     """
     Slave process
     @param compair: Comparison object
     """
     ar=Parse()
     status = MPI.Status()
     self.comm.send(1,dest=0, tag=1)
     root=Interaction()
     buffer=buffer_size
     flag_soft=True
     part=0
     while(status.tag!=0):
         #Mise en sommeil pour la reduction de consommation de ressource
         while not self.comm.Iprobe(source=0, tag=MPI.ANY_TAG):
             time.sleep(0.1)
         data = self.comm.recv(source=0, tag=MPI.ANY_TAG,status=status)
         if(status.tag==1):
             #Cree le noeud du soft
             if(flag_soft):
                 #compteur et nom du soft
                 root.setSoft(data[1],data[4])
                 flag_soft=False
             #Lancement de la comparaison
             result = compair.runComparison(data[0])
             #Parsing de la sortie
             ar.runParsing(data[1], data[2], data[3], data[5], data[6], result, root)                
             #Renvoyer le resultat de la ligne de commande
             self.comm.send(1,dest=status.source, tag=1)
             #Decrease the buffer
             buffer-=1
             #Vider le buffer
             if(buffer==0):
                 #Envoie des resultats
                 #self.comm.send(zlib.compress(root.getResult()), dest=1, tag=2)
                 self.Mpi_write_data(root, result_out, data[1], data[4], self.myrank, part)
                 #Vider le buffer
                 del(root)
                 #Remise a zero de l'arbre
                 root=Interaction()
                 #Reinitialisation du buffer
                 buffer=buffer_size
                 #Reinitialisation du flag soft
                 flag_soft=True
                 #Incrementation du numero de partie
                 part+=1
                 
         #Changement de soft
         elif(status.tag==2):
             #Envoie des resultats
             #self.comm.send(zlib.compress(root.getResult()), dest=1, tag=2)
             #Write data
             if(buffer!=buffer_size):
                 self.Mpi_write_data(root, result_out, data[0], data[1], self.myrank, part)
             #Reinitialisation du buffer
             buffer=buffer_size
             #Vider le buffer
             del(root)
             #Remise a zero de l'arbre
             root=Interaction()
             #Reinitialisation du flag soft
             flag_soft=True
             #Reinitialisation du numero de partie
             part=0
Example #51
0
File: CNF.py Project: mmccarty/nell
       if c == NOT :
           opposite = prim[1:]
       else :
           opposite = NOT + prim
       if opposite in d :
           return True
    return False


if __name__ == '__main__':
#def main():
    """main lets you test the algorithm with interactive input"""
    import Parse
    text = raw_input("Type a proposition: ")
    print
    prop0 = Parse.parse(Parse.scan(text))
    print "parse tree: "
    print prop0
    print
    prop1 = removeImplications(prop0)
    print "-> removed:"
    print prop1
    print
    prop2 = moveNegations(prop1)
    print "- shifted inwards:"
    print prop2
    print
    prop3 = makeIntoCNF(prop2)
    print "cnf:"
    print  prop3
    print
Example #52
0
def main(args=None):
	rerun = []
	args = Parse.get_args(Parse.get_parser())
	resubmit = False
	if args.which in ['snv','snvgroup','meta','merge','resubmit','tools']:
		if args.which == 'resubmit':
			with open(args.dir + '/' + os.path.basename(args.dir) + '.args.pkl', 'rb') as p:
				qsub = args.qsub if args.qsub else None
				args,cfg = pickle.load(p)
				if qsub:
					cfg['qsub'] = qsub
			with open(cfg['out'] + '/' + os.path.basename(cfg['out']) + '.rerun', 'r') as f:
				rerun = [int(line.rstrip()) for line in f]
			cfg['replace'] = True
			resubmit = True
		else:
			cfg = getattr(Parse, 'generate_' + args.which + '_cfg')(args.ordered_args)
	elif args.which != 'settings':
		cfg = getattr(Parse, 'generate_' + args.which + '_cfg')(args.ordered_args)

	##### read settings file #####
	ini = SafeConfigParser()
	ini.read(resource_filename('uga', 'settings.ini'))

	##### locate qsub wrapper #####
	qsub_wrapper = ini.get('main','wrapper')
	if 'qsub' in args and not os.access(ini.get('main','wrapper'),os.X_OK):
		print Process.print_error('uga qsub wrapper ' + ini.get('main','wrapper') + ' is not executable')
		return

	##### distribute jobs #####
	if args.which in ['snv','snvgroup','meta','merge','tools']:
		run_type = 0
		if cfg['cpus'] is not None and cfg['cpus'] > 1:
			run_type = run_type + 1
		if cfg['split'] and cfg['qsub'] is not None:
			run_type = run_type + 10
		if cfg['split_n'] and cfg['qsub'] is not None:
			run_type = run_type + 100
			
		if resubmit:
			jobs_df = pd.read_table(cfg['out'] + '/' + cfg['out'] + '.jobs')
		else:
			if args.which in ['snv','tools']:
				#	generate regions dataframe with M rows, either from --snv-map or by splitting data file or --snv-region according to --mb
				#	run_type = 0:   run as single job
				#	run_type = 1:   --cpus C (distribute M regions over C cpus and run single job, 1 job C cpus)
				#	run_type = 10:  --split (split M regions into single region jobs, M jobs 1 cpu)
				#	run_type = 100: --split-n N (distribute M regions over N jobs, N jobs 1 cpu)
				#	run_type = 11:  --split, --cpus C (split M regions into chunks of size M / C and run M jobs, M jobs C cpus)
				#	run_type = 101: --split-n N, --cpus C (distribute M regions over N jobs and distribute each over C cpus, N jobs C cpus)

				if cfg['region_file']:
					jobs_df = pd.read_table(cfg['region_file'],header=None,names=['region'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
					jobs_df['chr'] = [x.split(':')[0] for x in jobs_df['region']]
					jobs_df['chr_idx'] = [int(x.split(':')[0].replace('X','23').replace('Y','24')) for x in jobs_df['region']]
					jobs_df['start'] = [int(x.split(':')[1].split('-')[0]) for x in jobs_df['region']]
					jobs_df['end'] = [int(x.split(':')[1].split('-')[1]) for x in jobs_df['region']]
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
				else:
					snv_map = []
					data_files = []
					if args.which == 'snv':
						for m in cfg['models']:
							if cfg['models'][m]['file'] not in data_files:
								snv_map.extend(Map.map(file=cfg['models'][m]['file'], mb = cfg['mb'], region = cfg['region']))
								data_files.append(cfg['models'][m]['file'])
					else:
						snv_map.extend(Map.map(file=cfg['file'], mb = cfg['mb'], region = cfg['region']))
					snv_map = list(set(snv_map))
					jobs_df = pd.DataFrame({'region': snv_map, 'chr': [x.split(':')[0] for x in snv_map], 'chr_idx': [int(x.split(':')[0].replace('X','23').replace('Y','24')) for x in snv_map], 'start': [int(x.split(':')[1].split('-')[0]) for x in snv_map], 'end': [int(x.split(':')[1].split('-')[1]) for x in snv_map]})
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
					del data_files
					del snv_map
				jobs_df.sort_values(by=['chr_idx','start'],inplace=True)
				jobs_df = jobs_df[['chr','start','end','region','job','cpu']]
				jobs_df.reset_index(drop=True,inplace=True)
			if args.which in ['meta','merge']:
				#	generate regions dataframe with M rows, either from --snv-map or by splitting data file or --snv-region according to --mb
				#	run_type = 0:   run as single job
				#	run_type = 1:   --cpus C (distribute M regions over C cpus and run single job, 1 job C cpus)
				#	run_type = 10:  --split (split M regions into single region jobs, M jobs 1 cpu)
				#	run_type = 100: --split-n N (distribute M regions over N jobs, N jobs 1 cpu)
				#	run_type = 11:  --split, --cpus C (split M regions into chunks of size M / C and run M jobs, M jobs C cpus)
				#	run_type = 101: --split-n N, --cpus C (distribute M regions over N jobs and distribute each over C cpus, N jobs C cpus)
				if cfg['region_file']:
					jobs_df = pd.read_table(cfg['region_file'],header=None,names=['region'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
					jobs_df['chr'] = [int(x.split(':')[0]) for x in jobs_df['region']]
					jobs_df['start'] = [int(x.split(':')[1].split('-')[0]) for x in jobs_df['region']]
					jobs_df['end'] = [int(x.split(':')[1].split('-')[1]) for x in jobs_df['region']]
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
				else:
					snv_map = []
					data_files = []
					for f in cfg['files']:
						if f not in data_files:
							snv_map.extend(Map.map(file=cfg['files'][f], mb = cfg['mb'], region = cfg['region']))
							data_files.append(cfg['files'][f])
					snv_map = list(set(snv_map))
					jobs_df = pd.DataFrame({'region': snv_map, 'chr': [int(x.split(':')[0]) for x in snv_map], 'start': [int(x.split(':')[1].split('-')[0]) for x in snv_map], 'end': [int(x.split(':')[1].split('-')[1]) for x in snv_map]})
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
					del data_files
					del snv_map
				jobs_df = jobs_df[['chr','start','end','region','job','cpu']]
				jobs_df.sort_values(by=['chr','start'],inplace=True)
				jobs_df.reset_index(drop=True,inplace=True)

			if args.which == 'snvgroup':
				#	generate regions dataframe with M rows from --snvgroup-map
				#	run_type = 0:   run as single job
				#	run_type = 1:   --cpus C (distribute M snvgroups over C cpus and run single job, 1 job C cpus)
				#	run_type = 10:  --split (split M snvgroups into single region jobs, M jobs 1 cpu)
				#	run_type = 100: --split-n N (distribute M snvgroups over N jobs, N jobs 1 cpu)
				#	run_type = 101: --split-n N, --cpus C (distribute M snvgroups over N jobs and distribute each job over C cpus, N jobs C cpus)

				if cfg['region_file']:
					jobs_df = pd.read_table(cfg['region_file'],header=None,names=['region','group_id'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
					jobs_df['chr'] = [int(x.split(':')[0]) for x in jobs_df['region']]
					jobs_df['chr_idx'] = 1
					jobs_df['start'] = [int(x.split(':')[1].split('-')[0]) for x in jobs_df['region']]
					jobs_df['end'] = [int(x.split(':')[1].split('-')[1]) for x in jobs_df['region']]
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
					jobs_df = jobs_df[['chr','start','end','region','group_id','job','cpu']]
					jobs_df.sort_values(by=['chr','start'],inplace=True)
					jobs_df.reset_index(drop=True,inplace=True)
				elif cfg['region']:
					snv_map = []
					data_files = []
					for m in cfg['models']:
						if cfg['models'][m]['file'] not in data_files:
							snv_map.extend(Map.map(file=cfg['models'][m]['file'], mb = 1000, region = cfg['region']))
							data_files.append(cfg['models'][m]['file'])
					snv_map = list(set(snv_map))
					jobs_df = pd.DataFrame({'region': snv_map, 'chr': [int(x.split(':')[0]) for x in snv_map], 'start': [int(x.split(':')[1].split('-')[0]) for x in snv_map], 'end': [int(x.split(':')[1].split('-')[1]) for x in snv_map]})
					jobs_df['group_id'] = cfg['region']
					jobs_df['job'] = 1
					jobs_df['cpu'] = 1
					del data_files
					del snv_map
					jobs_df = jobs_df[['chr','start','end','region','group_id','job','cpu']]
					jobs_df.sort_values(by=['chr','start'],inplace=True)
					jobs_df.reset_index(drop=True,inplace=True)
				else:
					if cfg['snvgroup_map']:
						snvgroup_map = pd.read_table(cfg['snvgroup_map'],header=None,names=['chr','pos','marker','group_id'], compression='gzip' if cfg['snvgroup_map'].split('.')[-1] == 'gz' else None)
						jobs_df = snvgroup_map[['chr','pos','group_id']]
						jobs_df=jobs_df.groupby(['chr','group_id'])
						jobs_df = jobs_df.agg({'pos': [np.min,np.max]})
						jobs_df.columns = ['start','end']
						jobs_df['chr'] = jobs_df.index.get_level_values('chr')
						jobs_df['group_id'] = jobs_df.index.get_level_values('group_id')
						jobs_df['region'] = jobs_df.chr.map(str) + ':' + jobs_df.start.map(str) + '-' + jobs_df.end.map(str)
						jobs_df['job'] = 1
						jobs_df['cpu'] = 1
						jobs_df = jobs_df[['chr','start','end','region','group_id','job','cpu']]
						jobs_df.drop_duplicates(inplace=True)
						jobs_df.sort_values(by=['chr','start'],inplace=True)
						jobs_df.reset_index(drop=True,inplace=True)

			if jobs_df.empty:
				print Process.print_error('job list is empty, no variants found in region/s specified')
				return
			if run_type == 1:
				n = int(np.ceil(jobs_df.shape[0] / float(cfg['cpus'])))
				n_remain = int(jobs_df.shape[0] - (n-1) * cfg['cpus'])
				jobs_df['cpu'] = np.append(np.repeat(range(cfg['cpus'])[:n_remain],n),np.repeat(range(cfg['cpus'])[n_remain:],n-1)).astype(np.int64) + 1
			elif run_type == 10:
				jobs_df['job'] = jobs_df.index.values + 1
			elif run_type == 100:
				n = int(np.ceil(jobs_df.shape[0] / float(cfg['split_n'])))
				n_remain = int(jobs_df.shape[0] - (n-1) * cfg['split_n'])
				jobs_df['job'] = np.append(np.repeat(range(cfg['split_n'])[:n_remain],n),np.repeat(range(cfg['split_n'])[n_remain:],n-1)).astype(np.int64) + 1
			elif run_type == 11 and args.which != 'snvgroup':
				cfg['split_n'] = int(np.ceil(jobs_df.shape[0] / float(cfg['cpus'])))
				n = int(np.ceil(jobs_df.shape[0] / float(cfg['split_n'])))
				n_remain = int(jobs_df.shape[0] - (n-1) * cfg['split_n'])
				jobs_df['job'] = np.append(np.repeat(range(cfg['split_n'])[:n_remain],n),np.repeat(range(cfg['split_n'])[n_remain:],n-1)).astype(np.int64) + 1
				for i in range(1,int(max(jobs_df['job'])) + 1):
					n = int(np.ceil(jobs_df[jobs_df['job'] == i].shape[0] / float(cfg['cpus'])))
					n_remain = int(jobs_df[jobs_df['job'] == i].shape[0] - (n-1) * cfg['cpus'])
					jobs_df.loc[jobs_df['job'] == i,'cpu'] = np.append(np.repeat(range(cfg['cpus'])[:n_remain],n),np.repeat(range(cfg['cpus'])[n_remain:],n-1)).astype(np.int64) + 1
				cfg['split'] = None
			elif run_type == 101:
				n = int(np.ceil(jobs_df.shape[0] / float(cfg['split_n'])))
				n_remain = int(jobs_df.shape[0] - (n-1) * cfg['split_n'])
				jobs_df['job'] = np.append(np.repeat(range(cfg['split_n'])[:n_remain],n),np.repeat(range(cfg['split_n'])[n_remain:],n-1)).astype(np.int64) + 1
				for i in range(1,int(max(jobs_df['job'])) + 1):
					n = int(np.ceil(jobs_df[jobs_df['job'] == i].shape[0] / float(cfg['cpus'])))
					n_remain = int(jobs_df[jobs_df['job'] == i].shape[0] - (n-1) * cfg['cpus'])
					jobs_df.loc[jobs_df['job'] == i,'cpu'] = np.append(np.repeat(range(cfg['cpus'])[:n_remain],n),np.repeat(range(cfg['cpus'])[n_remain:],n-1)).astype(np.int64) + 1
			if int(max(jobs_df['job'])) + 1 > 100000:
				print Process.print_error('number of jobs exceeds 100,000, consider using --split-n to reduce the total number of jobs')
				return
			

	if args.which in ['snv','snvgroup','meta','merge','tools']:
		print 'detected run type ' + str(run_type) + ' ...'
		if len(rerun) == 0:
			if int(max(jobs_df['job'])) > 1 and cfg['qsub'] is not None:
				if 'mb' in cfg:
					print '   ' + str(jobs_df.shape[0]) + ' regions of size ' + str(cfg['mb']) + 'mb detected'
				else:
					print '   ' + str(jobs_df.shape[0]) + ' regions detected'
				print '   an array containing ' + str(int(max(jobs_df['job']))) + ' tasks will be submitted'
				print '   <= ' + str(max(np.bincount(jobs_df['job']))) + ' regions per task'
				print '   <= '  + str(int(max(jobs_df['cpu']))) + ' cpus per task'
				print '   qsub options: ' + cfg['qsub']
				print '   output directory: ' + cfg['out']
				print '   replace: ' + str(cfg['replace'])
				input_var = None
				while input_var not in ['y','n','Y','N']:
					input_var = raw_input('\nsubmit jobs (yY/nN)? ')
				if input_var.lower() == 'n':
					print 'canceled by user'
					return

			if os.path.exists(cfg['out']):
				if args.replace:
					print 'deleting old data'
					try:
						shutil.rmtree(cfg['out'])
					except OSError:
						print Process.print_error('unable to replace results directory' + cfg['out'])
				else:
					print Process.print_error('results directory ' + cfg['out'] + ' already exists, use --replace to overwrite existing results')
					return
			try:
				os.mkdir(cfg['out'])
			except OSError:
				pass

			with open(cfg['out'] + '/' + os.path.basename(cfg['out']) + '.args.pkl', 'wb') as p:
				pickle.dump([args, cfg], p)

			if run_type in [10,11,100,101] and jobs_df.shape[0] > 1:
				print "initializing job array database ..."
				try:
					os.mkdir(cfg['out'] + '/temp')
				except OSError:
					pass
				for j in range(1, int(max(jobs_df['job'])) + 1):
					try:
						os.mkdir(cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100))
					except OSError:
						pass
					try:
						os.mkdir(cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100) + '/job' + str(j))
					except OSError:
						pass
				with open(cfg['out'] + '/' + cfg['out'] + '.files', 'w') as jlist:
					for j in range(1, int(max(jobs_df['job'])) + 1):
						if args.which in ['snv','snvgroup','tools','merge']:
							if 'model_order' in cfg:
								for m in cfg['model_order']:
									if m != '___no_tag___':
										jlist.write(str(j) + '\t' + cfg['out'] + '.' + m + '.gz' + '\t' + cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100) + '/job' + str(j) + '/' + cfg['out'] + '.job' + str(j) + '.' + m + '.gz\n')
									else:
										jlist.write(str(j) + '\t' + cfg['out'] + '.gz' + '\t' + cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100) + '/job' + str(j) + '/' + cfg['out'] + '.job' + str(j) + '.gz\n')
							else:								
								jlist.write(str(j) + '\t' + cfg['out'] + '.gz' + '\t' + cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100) + '/job' + str(j) + '/' + cfg['out'] + '.job' + str(j) + '.gz\n')
						if 'meta_order' in cfg:
							if len(cfg['meta_order']) > 0:
								for m in cfg['meta_order']:
									jlist.write(str(j) + '\t' + cfg['out'] + '.' + m + '.gz' + '\t' + cfg['out'] + '/jobs' + str(100 * ((j-1) / 100) + 1) + '-' + str(100 * ((j-1) / 100) + 100) + '/job' + str(j) + '/' + cfg['out'] + '.job' + str(j) + '.' + m + '.gz\n')
			jobs_df.to_csv(cfg['out'] + '/' + cfg['out'] + '.jobs',header=True,index=False,sep="\t")
			with open(cfg['out'] + '/' + cfg['out'] + '.jobs.run','w') as f:
				f.write("\n".join([str(x) for x in jobs_df['job'].unique()]))
		else:
			if len(rerun) > 0 and cfg['qsub'] is not None:
				print 'detected resubmit ...'
				print '   an array containing ' + str(len(rerun)) + ' tasks will be submitted'
				print '   <= ' + str(max(np.bincount(jobs_df['job']))) + ' regions per job'
				print '   <= '  + str(int(max(jobs_df['cpu']))) + ' cpus per job'
				print '   qsub options: ' + cfg['qsub']
				print '   output directory: ' + cfg['out']
				print '   replace: ' + str(cfg['replace'])
				input_var = None
				while input_var not in ['y','n','Y','N']:
					input_var = raw_input('\nresubmit jobs (yY/nN)? ')
				if input_var.lower() == 'n':
					print 'canceled by user'
					return
			with open(cfg['out'] + '/' + cfg['out'] + '.jobs.run','w') as f:
				f.write("\n".join([str(x) for x in jobs_df['job'][jobs_df['job'].isin(rerun)]]))
			os.remove(cfg['out'] + '/' + os.path.basename(cfg['out']) + '.rerun')

	if args.which == 'settings':
		if 'ordered_args' in args:
			for k in args.ordered_args:
				ini.set('main',k[0],k[1])
			with open(resource_filename('uga', 'settings.ini'), 'w') as f:
				ini.write(f)
		print 'main settings ...'
		for s in ini.sections():
			for k in ini.options(s):
				print '   ' + k + ' = ' + ini.get(s,k)

	elif args.which in ['snv','snvgroup','meta','merge','resubmit','tools']:
		if cfg['qsub']:
			print "submitting jobs\n"
		out = cfg['out']
		joblist = range(1, int(max(jobs_df['job'])) + 1) if len(rerun) == 0 else rerun
		if int(max(jobs_df['job'])) > 1:
			cfg['out'] = out + '/jobsUGA_JOB_RANGE/jobUGA_JOB_ID/' + os.path.basename(out) + '.jobUGA_JOB_ID'
			cfg['job'] = 'UGA_JOB_ID'
			if cfg['qsub']:
				cfg['qsub'] = cfg['qsub'] + ' -t 1-' + str(len(joblist))
		else:
			cfg['out'] = out + '/' + os.path.basename(out)
			cfg['job'] = 1
			if cfg['qsub']:
				cfg['qsub'] = cfg['qsub'] + ' -t 1'
		args.ordered_args = [('out',cfg['out']),('region_file',out + '/' + out + '.jobs'),('job',cfg['job']),('cpus',int(max(jobs_df['cpu'])))] + [x for x in args.ordered_args if x[0] not in ['out','region_file','cpus']]
		cmd = 'Run' + args.which.capitalize() + '(' + str(args.ordered_args) + ')'
		if cfg['qsub']:
			Process.qsub(['qsub'] + cfg['qsub'].split() + ['-N',out,'-o',out + '/temp',qsub_wrapper],'\"' + cmd + '\"',out + '/' + out + '.jobs.run',cfg['out'] + '.log')
		else:
			Process.interactive(qsub_wrapper, cmd, cfg['out'] + '.' + args.which + '.log')

	elif args.which == 'compile':
		files = pd.read_table(args.dir + '/' + os.path.basename(args.dir) + '.files', names=['job','out','file'])
		complete, rerun = Fxns.verify_results(args.dir,files)
		if len(rerun) > 0:
			print Process.print_error('detected ' + str(len(rerun)) + ' failed jobs\n       use resubmit module to rerun failed jobs')
			with open(args.dir + '/' + os.path.basename(args.dir) + '.rerun', 'w') as f:
				f.write("\n".join([str(x) for x in rerun]))
		else:
			complete = Fxns.compile_results(args.dir,files)
			if complete:
				input_var = None
				while input_var not in ['y','n','Y','N']:
					input_var = raw_input('delete obselete job subdirectories and files for this project (yY/nN)? ')
				if input_var.lower() == 'n':
					print 'canceled by user'
				else:
					print 'deleting subdirectories'
					for d in glob.glob(args.dir + '/jobs*-*'):
						try:
							shutil.rmtree(d)
						except OSError:
							print Process.print_error('unable to delete job data directory ' + d)
					print 'deleting temporary directory'
					try:
						shutil.rmtree(args.dir + '/temp')
					except OSError:
						print Process.print_error('unable to delete temporary directory ' + args.dir + '/temp')
					print "deleting last job run list"
					try:
						os.remove(args.dir + '/' + os.path.basename(args.dir) + '.jobs.run')
					except OSError:
						print Process.print_error('unable to delete job run list ' + args.dir + '/' + os.path.basename(args.dir) + '.jobs.run')
			else:
				print Process.print_error('file compilation incomplete')

	elif args.which in ['snvgroupplot','snvplot']:
		cfg['out'] = '.'.join(cfg['file'].split('.')[0:len(cfg['file'].split('.'))-1]) + '.' + args.which
		args.ordered_args = [('out',cfg['out'])] + [x for x in args.ordered_args if x[0] not in ['out']]
		cmd = 'Run' + args.which.capitalize() + '(' + str(args.ordered_args) + ')'
		if cfg['qsub'] is not None:
			Process.qsub(['qsub'] + cfg['qsub'].split() + ['-o',cfg['out'] + '.log',qsub_wrapper],'\"' + cmd + '\"')
		else:
			Process.interactive(qsub_wrapper, cmd, cfg['out'] + '.log')

	elif args.which == 'filter':
		if os.path.exists(cfg['file'].replace('.gz','.' + cfg['tag'] + '.log')):
			if args.replace:
				try:
					os.remove(cfg['file'].replace('.gz','.' + cfg['tag'] + '.log'))
				except OSError:
					print Process.print_error('unable to remove existing log file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.log'))
					return
			else:
				print Process.print_error('log file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.log') + ' already exists, use --replace to overwrite existing results')
				return
		if os.path.exists(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz')):
			if args.replace:
				try:
					os.remove(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz'))
				except OSError:
					print Process.print_error('unable to remove existing inflation corrected results file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz'))
			else:
				print Process.print_error('results file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz') + ' already exists, use --replace to overwrite existing results')
				return
		if os.path.exists(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz.tbi')):
			if args.replace:
				try:
					os.remove(cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz.tbi'))
				except OSError:
					print Process.print_error('unable to remove existing inflation corrected results index file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz.tbi'))
			else:
				print Process.print_error('results index file ' + cfg['file'].replace('.gz','.' + cfg['tag'] + '.gz.tbi') + ' already exists, use --replace to overwrite existing results')
				return
		cmd = 'Run' + args.which.capitalize() + '(' + str(args.ordered_args) + ')'
		if cfg['qsub'] is not None:
			Process.qsub(['qsub'] + cfg['qsub'].split() + ['-o',cfg['file'].replace('.gz','.' + cfg['tag'] + '.log'),qsub_wrapper],'\"' + cmd + '\"')
		else:
			Process.interactive(qsub_wrapper, cmd, cfg['file'].replace('.gz','.' + cfg['tag'] + '.log'))
	else:
		print Process.print_error(args.which + " not a currently available module")

	print ''
Example #53
0
def proveUniversal(C6, bfactlist, bgoal) :
    """ Tries to prove a bgoal of form,   ["forall", lo, i, hi, bprop_i_]
        from  bfactlist.   First, attempts to show  hi <= lo,  meaning
        quantification ranges over empty set.   If this fails, tries
        to establish numerical lower and upper bounds for the quantification
        and enumerate proofs for all elements within these bounds.
        If this fails, then searches
        for a  bfact in bfactlist that is a forall of the same form,
        but where its upper bound is  hi-1.   If success, then tries
        to prove  bprop_hi-1_.

        If I have the energy, I'll try to make this smarter later....
    """
    #print "proveUNIVERSAL: bfactlist=", bfactlist
    #print "goal=", bgoal
    lo = bgoal[1]
    hi = bgoal[3]
    i = bgoal[2]
    bprop = bgoal[4]

    # first, see if  bgoal in premises:
    success = bgoal in bfactlist

    if not success :
        # next, try to prove that domain is empty, ie, hi <= lo :
        success = verifyRelation(C6, bfactlist, ["<=", hi, lo])

    if not success:
        # next, try to establish numerical lower and upper bounds and
        # prove  bprop  for all elements in the numerical range:
        lonum = PE.evallToInt(C6, lo)
        hinum = PE.evallToInt(C6, hi)
        if isinstance(lonum, int)  and  isinstance(hinum, int):
            success = True  # so far, so good...
            for j in range(lonum, hinum):
                stree = Parse.substituteTree(["int", str(j)], i, bprop)
                success = success and proveSequent(C6, bfactlist, stree)

    if not success:
        # then search bfactlist for a forall goal whose body
        #  matches  bprop and whose bounds cover  bgoal's all but one:
        possibles = [ f for f in bfactlist
                      if f[0] == "forall" \
                         and Parse.substituteTree(i, f[2], f[4]) == bprop \
                         #and verifyRelation(C6, bfactlist, ["==", f[1], lo]) \
                         and verifyRelation(C6, bfactlist, ["<=", f[1], lo]) \
                         and verifyRelation(C6, bfactlist,  \
                                     ["==", ["+", f[3], ["int", "1"]], hi]) \
                    ]
        if len(possibles) > 0 :
            success = proveSequent(C6, bfactlist, 
                                   Parse.substituteTree \
                                       (["-", hi, ["int", "1"]], i, bprop) )

    if not success:
        #search bfactlist for a forall goal whose body
        #  matches  bprop and whose bounds cover  bgoal's:
        possibles = [ f for f in bfactlist
                      if f[0] == "forall" \
                         and Parse.substituteTree(i, f[2], f[4]) == bprop \
                         and verifyRelation(C6, bfactlist, ["<=", f[1], lo]) \
                         and verifyRelation(C6, bfactlist, [">=", f[3], hi]) \
                    ]
        success = (len(possibles) > 0)

    return success
Example #54
0
import Parse as Parse
import csv

# Parse data from csv file

filename = 'Books/webpages.csv'
f = open(filename, 'rU')
f.seek(0)

fields = ['id', 'author_id', 'publisher_id', 'url', 'publication_series_id', 'title_id', 'award_type_id', 'title_series_id', 'award_category_id']
reader = csv.DictReader(f, dialect='excel-tab', fieldnames=fields)

data = []
for row in reader:

    author_id = Parse.nullize(row['author_id'])
    publisher_id = Parse.nullize(row['publisher_id'])
    publication_series_id = Parse.nullize(row['publication_series_id'])
    title_id = Parse.nullize(row['title_id'])
    award_type_id = Parse.nullize(row['award_type_id'])
    title_series_id = Parse.nullize(row['title_series_id'])
    award_category_id = Parse.nullize(row['award_category_id'])
    url = Parse.nullize(row['url'])

    data.append( (row['id'], author_id, publisher_id, publication_series_id, title_id, award_type_id, title_series_id, award_category_id, url) )


# Insert data into Database

# db = DB.Database('db4free.net','group8','toto123', 'cs322')
#
Example #55
0
def RunSnvgroup(args):
	cfg = Parse.generate_snvgroup_cfg(args)
	Parse.print_snvgroup_options(cfg)

	if not cfg['debug']:
		logging.disable(logging.CRITICAL)

	regions_df = pd.read_table(cfg['region_file'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
	regions_df = regions_df[regions_df['job'] == int(cfg['job'])].reset_index(drop=True)
	return_values = {}
	models_out = {}
	bgzfiles = {}
	print ''
	for m in cfg['model_order']:
		print "initializing out file for model " + m
		models_out[m] = cfg['out'] if m == '___no_tag___' else cfg['out'] + '.' + m
		try:
			bgzfiles[m] = bgzf.BgzfWriter(models_out[m] + '.gz', 'wb')
		except:
			print Process.Error("failed to initialize bgzip format out file " + models_out[m] + '.gz').out
			return 1
	if len(cfg['meta_order']) > 0:
		for m in cfg['meta_order']:
			print "initializing out file for meta " + m
			models_out[m] = cfg['out'] + '.' + m
			try:
				bgzfiles[m] = bgzf.BgzfWriter(models_out[m] + '.gz', 'wb')
			except:
				print Process.Error("failed to initialize bgzip format out file " + models_out[m] + '.gz').out
				return 1

	if cfg['cpus'] > 1:
		pool = mp.Pool(cfg['cpus']-1)
		for i in xrange(1,cfg['cpus']):
			return_values[i] = pool.apply_async(process_regions, args=(regions_df,cfg,i,True,))
			print "submitting job on cpu " + str(i) + " of " + str(cfg['cpus'])
		pool.close()
		print "executing job for cpu " + str(cfg['cpus']) + " of " + str(cfg['cpus']) + " via main process"
		main_return = process_regions(regions_df,cfg,cfg['cpus'],True)
		pool.join()

		if 1 in [return_values[i].get() for i in return_values] or main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	else:
		main_return = process_regions(regions_df,cfg,1,True)
		if main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	for i in xrange(1,cfg['cpus']+1):
		try:
			logfile = open(cfg['out'] + '.cpu' + str(i) + '.log', 'r')
		except:
			print Process.Error("failed to initialize log file " + cfg['out'] + '.cpu' + str(i) + '.log').out
			return 1
		print logfile.read()
		logfile.close()
		os.remove(cfg['out'] + '.cpu' + str(i) + '.log')

	for m in cfg['model_order']:
		written = False
		for i in xrange(1,cfg['cpus']+1):
			out_model_cpu = '/'.join(cfg['out'].split('/')[0:-1]) + '/' + cfg['out'].split('/')[-1] + '.cpu' + str(i) + '.' + m + '.pkl'
			pkl = open(out_model_cpu,"rb")
			results_final,metadata,results_header,tbx_start,tbx_end = pickle.load(pkl)
			if not written:
				bgzfiles[m].write(metadata)
				bgzfiles[m].write("\t".join(results_header) + '\n')
				written = True
			if results_final.shape[0] > 0:
				results_final.replace({'None': 'NA'}).to_csv(bgzfiles[m], index=False, sep='\t', header=False, na_rep='NA', float_format='%.5g', columns = results_header, append=True)
			pkl.close()
			os.remove(out_model_cpu)

		bgzfiles[m].close()

		print "indexing out file for model " + m if m != '___no_tag___' else "indexing out file"
		try:
			pysam.tabix_index(models_out[m] + '.gz',seq_col=0,start_col=tbx_start,end_col=tbx_end,force=True)
		except:
			print Process.Error('failed to generate index for file ' + models_out[m] + '.gz').out
			return 1

	if len(cfg['meta_order']) > 0:
		for m in cfg['meta_order']:
			written = False
			for i in xrange(1,cfg['cpus']+1):
				out_model_meta = '/'.join(cfg['out'].split('/')[0:-1]) + '/' + cfg['out'].split('/')[-1] + '.cpu' + str(i) + '.' + m + '.pkl'
				pkl = open(out_model_meta,"rb")
				results_final_meta,metadata,results_header,tbx_start,tbx_end = pickle.load(pkl)
				if not written:
					bgzfiles[m].write(metadata)
					bgzfiles[m].write('\t'.join(results_header) + '\n')
					written = True
				if results_final_meta.shape[0] > 0:
					results_final_meta.replace({'None': 'NA'}).to_csv(bgzfiles[m], index=False, sep='\t', header=False, na_rep='NA', float_format='%.5g', columns = results_header, append=True)
				pkl.close()
				os.remove(out_model_meta)

			bgzfiles[m].close()

			print "indexing out file for meta " + m
			try:
				pysam.tabix_index(models_out[m] + '.gz',seq_col=0,start_col=tbx_start,end_col=tbx_end,force=True)
			except:
				print Process.Error('failed to generate index for file ' + models_out[m] + '.gz').out
				return 1

	print "process complete"
	return 0
Example #56
0
##################################################################################

import Database as DB
import Parse as Parse
import csv

# Parse data from csv file

filename = 'Books/award_categories.csv'
f = open(filename, 'rU')
f.seek(0)

fields = ['id', 'name', 'type_id', 'order', 'note_id']
reader = csv.DictReader(f, dialect='excel-tab', fieldnames=fields)

data = []
for row in reader:

    order = Parse.nullize(row['order'])
    note_id = Parse.nullize(row['note_id'])

    data.append( (row['id'], row['name'], row['type_id'], order, note_id) )


# Insert data into Database

# db = DB.Database('db4free.net','group8','toto123', 'cs322')
#
# sql = 'INSERT INTO Awards (id, title, date, type_id, category_id, note_id) VALUES (%s, %s, %s, %s, %s, %s);'
# db.insertMany(sql, to_db)
Example #57
0
########################################################################
### Parse the titles.csv file, and import data to the MySQL database ###
########################################################################

import Database as DB
import Parse as Parse
import csv

# Parse data from csv file

filename = '../CSV/titles_rem.csv'
f = open(filename, 'rU')
f.seek(0)

fields = ['id', 'title', 'translator', 'synopsis', 'note_id', 'series_id', 'series_nb', 'story_length', 'story_type', 'parent', 'language_id', 'title_graphic']
reader = csv.DictReader(f, dialect='excel-tab', fieldnames=fields)

data = []
for row in reader:

    title_graphic = Parse.booleanize2(row['title_graphic'])
    data.append(
		(row['id'], row['title'], row['translator'], row['synopsis'], 
		 row['note_id'], row['series_id'], row['series_nb'], 
		 row['story_length'], row['story_type'], row['parent'], 
		 row['language_id'], title_graphic)
	       ) 

Parse.writeRows(data, 'titles')
Example #58
0
    def codeDisasm(self, offset=0, length=0, verbose=0, xref=False):
        """
		Makes the disassembler output.
		@param offset: start offset in co_code.
		@param length: length of the substring in co_code.
		@param verbose: verbosity of the output (0, 1, 2)
		@param xref: show back references from jumps and such.
		@return: the disassembler output.		
		"""
        cb = self.getAllCodeBlocks(offset, length)
        commands = self.getCommands(offset, length)
        r = ""
        for cmd in commands:
            if xref and cmd.offset in cb.blocks:
                xstring = cb.strkey(cmd.offset)
                if xstring != "":
                    r += "\n> xref " + cb.strkey(cmd.offset) + "\n"
            r += "%.8X     " % cmd.offset
            r += "%.2X " % cmd.opcode
            if cmd.mnemonics is not None:
                r += "- " + cmd.mnemonics + " " * (20 - len(cmd.mnemonics))
                if cmd.argument is not None:
                    if verbose >= 1:
                        r += "%.4X" % cmd.argument
                    if cmd.mnemonics in (
                        "LOAD_CONST",
                        "COMPARE_OP",
                        "LOAD_FAST",
                        "STORE_FAST",
                        "DELETE_FAST",
                        "IMPORT_NAME",
                        "IMPORT_FROM",
                        "STORE_GLOBAL",
                        "DELETE_GLOBAL",
                        "LOAD_GLOBAL",
                        "STORE_ATTR",
                        "DELETE_ATTR",
                        "LOAD_ATTR",
                        "STORE_NAME",
                        "DELETE_NAME",
                        "LOAD_NAME",
                        "LOAD_CLOSURE",
                        "LOAD_DEREF",
                        "STORE_DEREF",
                        "JUMP_FORWARD",
                        "JUMP_IF_TRUE",
                        "JUMP_IF_FALSE",
                        "SETUP_FINALLY",
                        "SETUP_EXCEPT",
                        "SETUP_LOOP",
                        "FOR_ITER",
                        "JUMP_ABSOLUTE",
                    ):
                        if verbose >= 1:
                            r += " = "
                        if cmd.mnemonics == "LOAD_CONST":
                            if self.co.consts.value[cmd.argument].__class__.__name__ == "pyCode":
                                r += self.co.consts.value[cmd.argument].info(verbose)
                            else:
                                # r +=  Parse.shorten(Parse.dropNewLines(self.co.consts.value[cmd.argument].info(verbose)))
                                r += self.co.consts.value[cmd.argument].info(verbose)
                        elif cmd.mnemonics == "COMPARE_OP":
                            r += '"' + Opcodes.cmp_op[cmd.argument] + '"'
                        elif cmd.mnemonics in ("LOAD_FAST", "STORE_FAST", "DELETE_FAST"):
                            r += self.co.varnames.value[cmd.argument].info(verbose)
                        elif cmd.mnemonics in (
                            "IMPORT_NAME",
                            "IMPORT_FROM",
                            "STORE_GLOBAL",
                            "DELETE_GLOBAL",
                            "LOAD_GLOBAL",
                            "STORE_ATTR",
                            "DELETE_ATTR",
                            "LOAD_ATTR",
                            "STORE_NAME",
                            "DELETE_NAME",
                            "LOAD_NAME",
                        ):
                            r += self.co.names.value[cmd.argument].info(verbose)
                        elif cmd.mnemonics in ("LOAD_CLOSURE", "LOAD_DEREF", "STORE_DEREF"):
                            if cmd.argument < len(self.co.cellvars.value):
                                r += self.co.cellvars.value[cmd.argument].info(verbose)
                            else:
                                r += self.co.freevars.value[cmd.argument - len(self.co.cellvars.value)].info(verbose)
                        elif cmd.mnemonics in (
                            "JUMP_FORWARD",
                            "JUMP_IF_TRUE",
                            "JUMP_IF_FALSE",
                            "SETUP_FINALLY",
                            "SETUP_EXCEPT",
                            "SETUP_LOOP",
                            "FOR_ITER",
                        ):
                            r += "-> %.8X" % (cmd.offset + cmd.argument + cmd.length)
                        elif cmd.mnemonics == "JUMP_ABSOLUTE":
                            r += "-> %.8X" % cmd.argument
                    else:
                        if verbose == 0:
                            r += "r%.4X" % cmd.argument
                if verbose >= 2 and len(Opcodes.opcodes[cmd.opcode]) > 2:
                    r += "\n" + Parse.indentText(Parse.narrowText(Opcodes.opcodes[cmd.opcode][2]), 1)
            r += "\n"
        return r
Example #59
0
def RunTools(args):
	cfg = Parse.generate_tools_cfg(args)
	Parse.print_tools_options(cfg)

	if not cfg['debug']:
		logging.disable(logging.CRITICAL)

	regions_df = pd.read_table(cfg['region_file'], compression='gzip' if cfg['region_file'].split('.')[-1] == 'gz' else None)
	regions_df = regions_df[regions_df['job'] == int(cfg['job'])].reset_index(drop=True)
	return_values = {}
	print ''
	print "initializing out file"
	try:
		bgzfile = bgzf.BgzfWriter(cfg['out'] + '.gz', 'wb')
	except:
		print Process.Error("failed to initialize bgzip format out file " + cfg['out'] + '.gz').out
		return 1

	if cfg['cpus'] > 1:
		pool = mp.Pool(cfg['cpus']-1)
		for i in xrange(1,cfg['cpus']):
			return_values[i] = pool.apply_async(process_regions, args=(regions_df,cfg,i,True,))
			print "submitting job on cpu " + str(i) + " of " + str(cfg['cpus'])
		pool.close()
		print "executing job for cpu " + str(cfg['cpus']) + " of " + str(cfg['cpus']) + " via main process"
		main_return = process_regions(regions_df,cfg,cfg['cpus'],True)
		pool.join()

		if 1 in [return_values[i].get() for i in return_values] or main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	else:
		main_return = process_regions(regions_df,cfg,1,True)
		if main_return == 1:
			print Process.Error("error detected, see log files").out
			return 1

	for i in xrange(1,cfg['cpus']+1):
		try:
			logfile = open(cfg['out'] + '.cpu' + str(i) + '.log', 'r')
		except:
			print Process.Error("failed to initialize log file " + cfg['out'] + '.cpu' + str(i) + '.log').out
			return 1
		print logfile.read()
		logfile.close()
		os.remove(cfg['out'] + '.cpu' + str(i) + '.log')

	written = False
	for i in xrange(1,cfg['cpus']+1):
		cpu_regions_df = regions_df[regions_df['cpu'] == i].reset_index()
		for j in xrange(0,len(cpu_regions_df.index)):
			f_temp=glob.glob(cfg['out'] + '.cpu' + str(i) + '.chr' + cpu_regions_df['region'][j].replace(':','bp') + '*.gz')[0]
			try:
				h=pysam.TabixFile(filename=f_temp,parser=pysam.asVCF())
			except:
				print Process.Error("failed to load vcf file " + f_temp)
				return 1
			if not written:
				for row in h.header:
					bgzfile.write(str(row) + '\n')
				written = True
			h_iter = h.fetch(region=str(cpu_regions_df['chr'][j]))
			for row in h_iter:
				bgzfile.write(str(row) + '\n')
			for f in glob.glob(cfg['out'] + '.cpu' + str(i) + '.chr' + cpu_regions_df['region'][j].replace(':','bp') + '.*'):
				os.remove(f)

	bgzfile.close()

	print "indexing out file"
	try:
		pysam.tabix_index(cfg['out'] + '.gz',preset="vcf",force=True)
	except:
		print Process.Error('failed to generate index').out
		return 1

	print "process complete"
	return 0
Example #60
0
def evall(C6, etree):
    """evaluates etree into PE format,  using  store and heap in C6

       params:  C6;  etree in usual form, defined in Parse.py
       returns : resulting  PE-value.  If etree is malformed or
         evall gets lost, it returns {}
    """
    ans = {} 
    # case on structure of etree:
    op = etree[0]
    if op == "var" :  # etree is  ["var", vname],  where  vname is a string(!)
        store = C6["store"]
        vname = etree[1]
        if vname in store :
            ans = store[vname] 
        else :  # invent a dummy name for the var and return it...
            #newvalue = make(makeSym())
            #store[vname] = newvalue
            #ans = newvalue # {(_cn,):1, ():0}  # is a free var
            if verbose :
                print "WARNING: evall couldn't find var " + vname + " in the store"
    elif op == "int" :  # etree is ["int", "n"]
        ans = make(int(etree[1]))  
    elif op == "readInt" :
        ans = make(makeSym())   # it's an unknown, new input int
    elif op == "index": # ["index", ["var", v], etree]
        arrayloc = peToTuple(evall(C6, etree[1]))  # get loc of array in heap
        if arrayloc in C6["heap"] :
            vector = C6["heap"][arrayloc][1]
            index = peToTuple(evall(C6, etree[2]))  # PE val to tuple rep.
            if index in vector :   # lookup  listname[index]
                ans = vector[index]
            else :
                # can't find index in  vector,  so try to prove index
                # equal to an existing key:
                listkeys = vector.keys()
                indexpe = tupleToPe(index)
                alias = {}
                for key in listkeys :
                    keype = tupleToPe(key)
                    found = proveRELATION(C6["rels"], ["==", keype, indexpe])
                    if found :
                        alias = key
                        break
                if alias != {} :
                    ans = vector[alias]
                else :
                    if verbose :
                        print "WARNING: evall could not resolve " + etree[1][1] +  str(etree[2]) + " in the store.  Will fake it."
                    newvalue = make(makeSym())
                    vector[index] = newvalue
                    ans = newvalue
        else :
            error("scalar or unknown var " + Parse.tostringExpr(etree[1]) + " cannot be indexed")
    elif op == "len" :   # etree is  ["len", vtree]
        arrayloc = peToTuple(evall(C6, etree[1]))  # get loc of array in heap
        if arrayloc in C6["heap"] :
            ans = C6["heap"][arrayloc][0]
        else :
            if verbose :
                print "WARNING: evall couldn't find array " + etree[1][1] + " in the store"
    elif op == "list" : # etree is ["list" etreelist ] --- const array
        newvector = {}
        elems = [ evall(C6, e) for e in etree[1] ]
        for i in range(len(elems)) :
            newvector[peToTuple(make(i))] = elems[i]
        newloc = make(makeSym())  # for now, we treat locns  like ints
        C6["heap"][peToTuple(newloc)] = (make(len(elems)), newvector)
        ans = newloc # built a new array

    elif op == "call" :  # etree is ["call", v, etreelist ]
        # makes a ``skolem constant'' out of the call --- does NOT evall call
        fname = etree[1]
        args = [ peToTuple(evall(C6, e)) for e in etree[2] ]
        # build key, (fname, arg0, arg1, ..., argn), and make a PE value for it:
        ans = make( (fname,) + tuple(args) )
    
    elif op == "+" :  #  etree is  {"+", e1, e2]
        pe1 = evall(C6, etree[1])
        pe2 = evall(C6, etree[2])
        ans = add(pe1, pe2)   # places sum of pe1 and pe2 into ans
        """ SORRY-- can no longer do list addition/append this way:
        elif isinstance(pe1, tuple) and isinstance(pe2, tuple) : # both lists
            len1 = pe1[0]  # length of first list
            anslen = add(len1, pe2[0])  # length of combined lists
            ansmap = {}
            for k in pe1[1].keys() :
                ansmap[k] = pe1[1][k]  # copying items in pe1 list into ans
            for k in pe2[1].keys() :
                newkey = peToTuple(add(len1, tupleToPe(k)))
                ansmap[newkey] = pe2[1][k]
            ans = (anslen, ansmap)
        else :
            error("adding arrays and ints")
        """

    elif op == "-" :  # etree is  ["-", e1, e2]
        pe1 = evall(C6, etree[1])
        pe2 = evall(C6, etree[2])
        ans = subtract(pe1, pe2)
    elif op == "*" :  # etree is  ["*", e1, e2]
        pe1 = evall(C6, etree[1])
        pe2 = evall(C6, etree[2])
        ans = mult(pe1, pe2)
    else : # the expression is not an int-typed expr, and we are lost
        error("evall cannot evaluate this non-int expr: " + str(etree))
    return ans