Esempio n. 1
0
 def download(self):
     import zipfile
     import urllib
     dfile = config.path("..","data",self.datafile.upper(), self.datafile.upper() + ".zip")
     urllib.urlretrieve(config.download % self.datafile.lower(), dfile)
     with zipfile.ZipFile(dfile) as zf:
         zf.extractall(config.path("..","data",self.datafile.upper(),"data"))
Esempio n. 2
0
def visualize_with_json(args, vis_coords, sequences, strand, result_coords, fasta_sequence, targets):
    # new function
    cutcoords = get_coordinates_for_json_visualization(args, vis_coords, sequences, strand, result_coords)

    info = open(f"{args.output_dir}/run.info", 'w')
    info.write("%s\t%s\t%s\t%s\t%s\n" % ("".join(args.targets), args.genome, args.program_mode, args.unique_method_cong,
                                         args.guide_size))
    info.close()

    if args.bed:
        print_bed(args.program_mode, vis_coords, cutcoords, f"{args.output_dir}/results.bed",
                  vis_coords[0]["name"] if args.fasta else args.targets)

    if args.gen_bank:
        if args.fasta:
            seq = fasta_sequence
            chrom = vis_coords[0]["name"]
            start = 0
            finish = len(fasta_sequence)
        else:
            # targets min-max (with introns)
            regions = targets
            chrom = regions[0][0:regions[0].rfind(':')]
            start = []
            finish = []
            targets = []
            for region in regions:
                start_r = int(region[region.rfind(':') + 1:region.rfind('-')])
                start_r = max(start_r, 0)
                start.append(start_r)
                finish_r = int(region[region.rfind('-') + 1:])
                finish.append(finish_r)
                targets.append([chrom, start_r, finish_r])
            start = min(start)
            finish = max(finish)

            prog = subprocess.Popen("%s -seq=%s -start=%d -end=%d %s/%s.2bit stdout 2> %s/twoBitToFa.err" % (
                config.path("TWOBITTOFA"), chrom, start, finish,
                config.path("TWOBIT_INDEX_DIR") if not config.isoforms else config.path("ISOFORMS_INDEX_DIR"),
                args.genome, args.output_dir), stdout=subprocess.PIPE, shell=True)
            output = prog.communicate()
            if prog.returncode != 0:
                sys.stderr.write("Running twoBitToFa failed when creating GenBank file\n")
                sys.exit(EXIT['TWOBITTOFA_ERROR'])

            output = output[0].decode()
            output = output.split("\n")
            seq = ''.join(output[1:]).upper()

        print_genbank(args.program_mode, chrom if args.fasta else args.targets, seq,
                      [] if args.fasta else targets, cutcoords, chrom, start, finish,
                      strand, f"{args.output_dir}/results.gb", "CHOPCHOP results")
Esempio n. 3
0
    def __init__ (self, datafile = ""):
        self.datafile = datafile
        self.features = {}
        self.tags = []
        self.costs = []
        self.categorical = []
        self.continuous = []
        

        self.parseCodebook()
        self.varTables = config.get(config.path("..","data",datafile,"data","varTables.p"), gf.read_tables, datafile = datafile)
        self.titleMap = config.get(config.path("..","data",datafile,"data","table_map.p"), self.writeTables)
        self.filterIDS()
        self.writeDataCSV()
        self.getCostFeatures()
Esempio n. 4
0
    def writeDataCSV(self):
        """
        Download data
        """
        def download(self):
            import zipfile
            import urllib
            dfile = config.path("..","data",self.datafile.upper(), self.datafile.upper() + ".zip")
            urllib.urlretrieve(config.download % self.datafile.lower(), dfile)
            with zipfile.ZipFile(dfile) as zf:
                zf.extractall(config.path("..","data",self.datafile.upper(),"data"))

        path = config.path("..","data",self.datafile, "data", self.datafile.lower())

        if not config.os.path.exists(path + ".dat"):    download(self)
        if config.os.path.exists(path + ".csv"): return

        indices = [self.features[tag][0] for tag in self.tags]
        printFormat = "".join(["%s" * (high - low) + "," for low,high in zip(indices, indices[1:])])
        
        # Categorical Mapper Path
        with open(path+".csv", 'wb') as g:
            with open(path + ".dat", 'rb') as f:
                format_ = printFormat + "%s" * (len(f.readline().strip()) - indices[-1] + 1)
                for line in f:
                    values = (format_ % (tuple(line.strip()))).split(",")
                    for i,value in enumerate(values):
                        try:
                            val = str(float(values[i]))
                        except:
                            val = str(values[i])
                    g.write(",".join(values) + "\n")
Esempio n. 5
0
def read_tables(datafile):
    """
    From get_features.py
    Parses the HTML as plain text
    Returns dictionary of {titles:variables}
    """
    path = config.path("..","data",datafile,"data", "tables.txt")
    if not config.os.path.exists(path):
        page = download(datafile)
    else:
        with open(path, 'rb') as f:
            page = f.read()
    start = page.find("<a name=\"DVariable\">")
    if start == -1:
        start = page.rfind("Variable-Source Crosswalk</a>")
    page = page[start:]
    end = page.rfind("<a name=\"Appendix")
    soup = Soup(page[:end])
    titles, tables = [], []
    found_tables = soup.find_all("table", summary= re.compile("This table identifies the variable .*"))
    for table in found_tables:
        title = table.caption if table.caption != None else table.find_previous_sibling("p", {"class":"contentStyle"})
        titles.append(title.text.encode("utf-8"))
        tables.append([var.text.encode("utf-8") for var in table.find_all("th")[3:]])

    if not (len(titles) == len(tables) and titles != [] and [] not in tables):
        return False

    return dict(zip(titles,tables))
Esempio n. 6
0
def download(datafile):
    """
    From get_features.py\n
    Downloads the documentation as text file from HTML
    """
    try:
        page = urllib2.urlopen(config.tables.format(datafile.lower())).read()
    except:
        print "HTTP Failed: Check your connection to the internet or check the name of the datafile"
        sys.exit()
    with open(config.path("..","data",datafile,"data", "tables.txt"), 'wb') as f:
        f.write(page)
    return page
Esempio n. 7
0
def writeFeatureImportance(model, trainFeature, datafile):
	"""
	Formats and prints the importance of each feature
	author: Jazmin 
	TODO: right now it gets the actual name of the features in a HORRIBLE NOT EFFICIENT WAY make it better
	"""
	importances = zip(range(trainFeature.shape[1]), model.feature_importances_)
	importances.sort(key = itemgetter(1))
	with open(config.path("..", "data", datafile, "featureImportance.py"), "wb") as f:
		f.write("importance = ")
		for featureIndex,importance in importances[::-1]:
			variable = config.feature_dict["H147"][featureIndex]
			f.write(" " + str(variable) + " " + str(importance) + " " + str(dc.getData(datafile)[0][variable][0]) + " \n")
def set_from_config(kwargs):
    if kwargs["config"] is None:
        config_path = config.path()
    else:
        config_path = kwargs["config"]

    kwargs["config_path"] = config_path

    kwargs["config"] = config.read(kwargs["config_path"])

    keys = {"paths": [("prefs", "prefs_root", True),
                      ("run_info", "run_info", True)],
            "web-platform-tests": [("remote_url", "remote_url", False),
                                   ("branch", "branch", False),
                                   ("sync_path", "sync_path", True)],
            "SSL": [("openssl_binary", "openssl_binary", True),
                    ("certutil_binary", "certutil_binary", True),
                    ("ca_cert_path", "ca_cert_path", True),
                    ("host_cert_path", "host_cert_path", True),
                    ("host_key_path", "host_key_path", True)]}

    for section, values in keys.iteritems():
        for config_value, kw_value, is_path in values:
            if kw_value in kwargs and kwargs[kw_value] is None:
                if not is_path:
                    new_value = kwargs["config"].get(section, config.ConfigDict({})).get(config_value)
                else:
                    new_value = kwargs["config"].get(section, config.ConfigDict({})).get_path(config_value)
                kwargs[kw_value] = new_value

    kwargs["test_paths"] = get_test_paths(kwargs["config"])

    if kwargs["tests_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"]

    if kwargs["metadata_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"]

    if kwargs.get("manifest_path"):
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["manifest_path"] = kwargs["manifest_path"]

    kwargs["suite_name"] = kwargs["config"].get("web-platform-tests", {}).get("name", "web-platform-tests")


    check_paths(kwargs)
Esempio n. 9
0
def set_from_config(kwargs):
    if kwargs["config"] is None:
        config_path = config.path()
    else:
        config_path = kwargs["config"]

    kwargs["config_path"] = config_path
    kwargs["config"] = config.read(kwargs["config_path"])
    kwargs["test_paths"] = OrderedDict()

    keys = {
        "paths": [("serve", "serve_root", True), ("prefs", "prefs_root", True), ("run_info", "run_info", True)],
        "web-platform-tests": [
            ("remote_url", "remote_url", False),
            ("branch", "branch", False),
            ("sync_path", "sync_path", True),
        ],
    }

    for section, values in keys.iteritems():
        for config_value, kw_value, is_path in values:
            if kw_value in kwargs and kwargs[kw_value] is None:
                if not is_path:
                    new_value = kwargs["config"].get(section, {}).get(config_value)
                else:
                    new_value = kwargs["config"].get(section, {}).get_path(config_value)
                kwargs[kw_value] = new_value

    # Set up test_paths

    for section in kwargs["config"].iterkeys():
        if section.startswith("manifest:"):
            manifest_opts = kwargs["config"].get(section)
            url_base = manifest_opts.get("url_base", "/")
            kwargs["test_paths"][url_base] = {
                "tests_path": manifest_opts.get_path("tests"),
                "metadata_path": manifest_opts.get_path("metadata"),
            }

    if kwargs["tests_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"]

    if kwargs["metadata_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"]
Esempio n. 10
0
def writeFeatures(costFeature, datafile, importance , tags):
    """
    Writes feature importances to file in order of importance
    Saves to pickle file for use in future modelling

    Takes in costFeature index of d.tags
    Takes in the model

    Returns the costFeature, Sorted list of feature indices based on importance
    """
    sortedFeatures = sorted(zip(tags, list(importance)) ,  key = (lambda x:-x[1]))
    with open(config.path("..","data",datafile,"features",  "importances", "%s.txt" % (costFeature)),'wb')as f:
        for feature, importance in sortedFeatures:
            write = "%s#%f\n" % (feature, importance)
            f.write(write.replace("#", (24 - len(write)) * " "))
    return sortedFeatures
Esempio n. 11
0
 def writeTables(self):
     """
     In data.py
     Writing tables to file for user to reference
     """
     path = config.path("..","data",self.datafile,"data", "variables.txt")
     if config.os.path.exists(path):
         return
     with open(path, 'wb') as f:
         f.write("Variables found for data set %s\n" % self.datafile)
         i = 0 
         varMap = {}
         for title, tables in self.varTables.items():
             f.write("\n\n=== %s :: %s ===\n" % (string.letters[i].upper(),title))
             f.write("\n".join(["\t%s%s%s" % (tag, (18 - len(tag))*" ",self.features[tag][1]) for tag in tables if tag in self.features]))
             varMap[string.letters[i].upper()] = (title, [tag for tag in tables if tag in self.features])
             i += 1
     return varMap
Esempio n. 12
0
def set_from_config(kwargs):
    if kwargs["config"] is None:
        kwargs["config"] = config.path()

    kwargs["config"] = config.read(kwargs["config"])

    keys = {"paths": [("tests", "tests_root", True),
                      ("metadata", "metadata_root", True)],
            "web-platform-tests": [("remote_url", "remote_url", False),
                                   ("branch", "branch", False),
                                   ("sync_path", "sync_path", True)]}

    for section, values in keys.iteritems():
        for config_value, kw_value, is_path in values:
            if kw_value in kwargs and kwargs[kw_value] is None:
                if not is_path:
                    new_value = kwargs["config"].get(section, {}).get(config_value, None)
                else:
                    new_value = kwargs["config"].get(section, {}).get_path(config_value)
                kwargs[kw_value] = new_value
Esempio n. 13
0
def set_from_config(kwargs):
    if kwargs["config"] is None:
        config_path = config.path()
    else:
        config_path = kwargs["config"]

    kwargs["config_path"] = config_path
    kwargs["config"] = config.read(kwargs["config_path"])

    keys = {"paths": [("serve", "serve_root", True),
                      ("prefs", "prefs_root", True),
                      ("run_info", "run_info", True)],
            "web-platform-tests": [("remote_url", "remote_url", False),
                                   ("branch", "branch", False),
                                   ("sync_path", "sync_path", True)]}

    for section, values in keys.iteritems():
        for config_value, kw_value, is_path in values:
            if kw_value in kwargs and kwargs[kw_value] is None:
                if not is_path:
                    new_value = kwargs["config"].get(section, {}).get(config_value)
                else:
                    new_value = kwargs["config"].get(section, {}).get_path(config_value)
                kwargs[kw_value] = new_value

    kwargs["test_paths"] = get_test_paths(kwargs["config"])

    if kwargs["tests_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["tests_path"] = kwargs["tests_root"]

    if kwargs["metadata_root"]:
        if "/" not in kwargs["test_paths"]:
            kwargs["test_paths"]["/"] = {}
        kwargs["test_paths"]["/"]["metadata_path"] = kwargs["metadata_root"]
Esempio n. 14
0
                        help = "number of trees to use for decision tree algorithms")
    parse.add_option("-l", "--lookup", dest = "lookup", default = "",
                        help = "looks up specific variable and prints descriptions and values")
    parse.add_option("-u", "--use", dest = "model", default = "",
                        help = "use an extracted model to predict costs")
    parse.add_option("-e", "--extract", dest = "extract", default = "",
                        help = "target cost to create a model for future use")

    (options, args) = parse.parse_args()

    output = sys.stdout

    if options.tables != "none":
        print "Looking up tables, please wait..."
        # sys.stdout = open("runOutput.txt", 'wb')
        d = config.get(config.path("..","data",options.datafile,"data","dHandler.p"), dc.Data, datafile = options.datafile)     
        print "Cost Features:\n%s" % "\n".join([d.tags[tag] for tag in d.costs])
        # sys.stdout = output
        variable_lookup(d, options.tables) 
        sys.exit()

    if options.lookup != "":
        import feature_lookup as fl
        d = config.get(config.path("..","data",options.datafile,"data","dHandler.p"), dc.Data, datafile = options.datafile)     
        print "Looking up feature, please wait..."
        # sys.stdout = open("runOutput.txt", 'wb')
        # sys.stdout = output
        print "======================================="
        print  fl.getDetails(options.datafile, options.lookup)["Description"]
        print
        print  fl.getDetails(options.datafile, options.lookup)["Values"]
Esempio n. 15
0
#!/usr/bin/env python

"""

Jappix Me - Your public profile, anywhere
Pending profile checker

License: AGPL
Author: Valerian Saliou

"""

import xmpp, os, shutil, time, phpserialize, config

BASE_DIR = config.path()


###############
### MESSAGE ###
###############


def message_app_send(session, user, body, app_data):
    url = xmpp.Node("url", payload=[app_data["url"]])
    action = xmpp.Node(
        "action", attrs={"type": app_data["type"], "job": app_data["job"], "success": app_data["success"]}
    )

    data = xmpp.Node("data", attrs={"xmlns": "jappix:app:" + app_data["id"]}, payload=[action, url])
    name = xmpp.Node("name", attrs={"id": app_data["id"]}, payload=[app_data["name"]])
Esempio n. 16
0
from redis import Redis
from flask import Flask, session, redirect, url_for, render_template, request

from pushchat.gravatar import get_gravatar
from pushchat.validators import validate_email
from pushchat import publisher

import config

app = Flask(__name__, template_folder=config.path('templates'),
            static_folder=config.path('static'))
redis = None


def get_post(post_id):
    """Given a post id, return a dictionary with the body and avatar url."""
    post = redis.get('post:%s' % (post_id,))
    if post:
        user, post = post.split('|', 1)
        post = unicode(post, errors='ignore')
        return dict(body=post, user=get_gravatar(user, size=16))


def set_post(post):
    """Store a post and push it onto the global timeline."""
    if 'email' not in session:
        return redirect(url_for('.login'))
    post_id = redis.incr('last-post-id')
    post = post[:140]  # silently truncate!
    redis.set('post:%d' % (post_id,), '%s|%s' % (session['email'], post))
    redis.lpush('global:timeline', post_id)
Esempio n. 17
0
 def __str__(self):
     return (open(config.path("..","data",self.datafile,"data", "variables.txt"), 'rb')).read()
Esempio n. 18
0
    def parseCodebook(self):
        """
        Given the datafile name, returns the codebook needed
        author: chris
        """
        import urllib2, unicodedata
        def download(path):
            page = urllib2.urlopen(config.codebook.format(self.datafile.lower())).read()
            with open(path, 'wb') as f:
                f.write(page)
            return page
        path = config.path("..","data",self.datafile,"data","codebook.txt")

        if not config.os.path.exists(path):
            page = download(path)
        else:
            with open(path, 'rb') as f:
                page = f.read()

        _input  = page.find("* INPUT STATEMENTS;")
        _format = page.find("* FORMAT STATEMENTS;")
        _label  = page.find("* LABEL STATEMENTS;")
        _value  = page.find("* VALUE STATEMENTS;")

        indices = page[_input:_format]
        mapping = page[_format:_label]
        desc = page[_label: _value]
        values = page[_value:]

        for line in indices.split("\n")[3:]:
            if line.strip() == ";":
                break
            split = line.split()
            self.tags.append(split[-2].strip())
            self.features[split[-2].strip()] = [int(split[-3].strip()[1:])]
        for line in desc.split("\n")[1:]:
            if line.strip() == ";":
                break
            split = line.split("=")
            self.features[split[0].strip().split()[-1]].append(split[1].strip())
        
        mapper = {}
        for line in mapping.split("\n")[1:]:
            if line.strip() == ";":
                break
            split = line.split()
            mapper[split[-1].strip()[:-1]] = split[-2].strip()


        tag = ""
        value_list = []
        count = 0
        cost_tags = [self.tags[cost] for cost in self.costs]
        for line in values.split("\n")[1:]:
            if line.strip() == "":
                continue
            if "VALUE" in line[:6]:
                tag = mapper[line.split()[1].strip()]
                continue
            if "=" in line:
                split = line.split("=")
                value_list.append((split[0].strip(), split[1].strip()))
            if ";" == line.strip()[0]:
                check = value_list[-1][-1]         

                if "-" in check and check.split("-")[-1].strip()[0] in ["$","0","1","2","3","4","5","6","7","8","9"]:
                    self.continuous.append(self.tags.index(tag))
                else:
                    self.categorical.append(self.tags.index(tag))
                self.features[tag].append(value_list)
                value_list = []
                continue
        return