def setUpApplication(self, app):
        fileBodies = []
        totalValues = OrderedDict()
        rootDir = app.getPreviousWriteDirInfo(self.tmpInfo)
        if not os.path.isdir(rootDir):
            sys.stderr.write("No temporary directory found at " + rootDir + " - not collecting batch reports.\n")
            return
        dirlist = os.listdir(rootDir)
        dirlist.sort()
        compulsoryVersions = set(app.getBatchConfigValue("batch_collect_compulsory_version"))
        versionsFound = set()
        for dir in dirlist:
            fullDir = os.path.join(rootDir, dir)
            if os.path.isdir(fullDir) and self.matchesApp(dir, app):
                currBodies, currVersions = self.parseDirectory(fullDir, app, totalValues)
                fileBodies += currBodies
                versionsFound.update(currVersions)
        if len(fileBodies) == 0:
            self.diag.info("No information found in " + rootDir)
            return

        missingVersions = compulsoryVersions.difference(versionsFound)

        mailTitle = self.getTitle(app, totalValues)
        mailContents = self.mailSender.createMailHeaderForSend(self.runId, mailTitle, app)
        mailContents += self.getBody(fileBodies, missingVersions)
        allSuccess = len(totalValues.keys()) == 1 and totalValues.keys()[0] == "succeeded"
        self.mailSender.sendOrStoreMail(app, mailContents, isAllSuccess=allSuccess)
Exemple #2
0
def printClusters(tf_idf_values, dom_values, ind, centroids):
    xy_dict = dict(zip(tf_idf_values, dom_values))
    xy_dict_ordered = OrderedDict(
        sorted(xy_dict.items(), key=lambda item: item[0]))
    colors = ['r', 'y', 'm', 'c', 'b', 'g', 'r', 'y', 'm', 'c']
    ax.hold(True)
    ax.set_ylim(0, 1.1)
    ax.set_xlim(0, 1)
    #plt.grid(True, linestyle='-', color='0.75')
    ax.scatter(xy_dict_ordered.keys(),
               xy_dict_ordered.values(),
               s=30,
               c=colors[ind],
               marker='o',
               alpha=0.75,
               linewidths=.1)
    ax.plot(xy_dict_ordered.keys(),
            xy_dict_ordered.values(),
            linestyle='-',
            c=colors[ind],
            alpha=.40)
    # centroids
    ax.scatter(centroids[ind],
               1,
               marker='o',
               s=300,
               linewidths=1,
               c='w',
               alpha=0.60)
    ax.scatter(centroids[ind],
               1,
               marker='x',
               s=300,
               linewidths=1,
               c='k',
               alpha=0.60)
    ax.vlines(x=centroids[ind],
              ymin=0,
              ymax=1,
              color='k',
              linestyles='solid',
              alpha=0.40)

    #avg_dom = sum(dom_values)/len(dom_values)
    #min_val = min([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5])
    #max_val = max([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5])
    #ax.plot([min_val, centroids[ind], max_val], [0, 1, 0], linewidth=0.3, color='black')
    #print min_val, max_val
    canvas.print_figure('arun_plot.pdf', dpi=700)
Exemple #3
0
 def test_to_dict(self):
     """
     Note: data file with column headers are tested
     in test_filters.py
     """
     r = pe.Reader(self.testfile)
     result = OrderedDict()
     result.update({"Series_1": [1, 2, 3, 4]})
     result.update({"Series_2": [5, 6, 7, 8, ]})
     result.update({"Series_3": [9, 10, 11, 12]})
     actual = pe.utils.to_dict(r.rows())
     assert actual.keys() == result.keys()
     assert result == actual
     result = {
         "Series_1": 1,
         "Series_2": 2,
         "Series_3": 3,
         "Series_4": 4,
         "Series_5": 5,
         "Series_6": 6,
         "Series_7": 7,
         "Series_8": 8,
         "Series_9": 9,
         "Series_10": 10,
         "Series_11": 11,
         "Series_12": 12
     }
     actual = pe.utils.to_dict(r.enumerate())
     assert result == actual
def split_sets(split, response_sets, survey_cache, split_entities = False):
    result = []

    if split:
        for ds in split:
            result.append((ds, flatten_response_queryset(response_sets.filter(data_series=ds), survey_cache)))
    else:
        split = [None]
        result.append((None, flatten_response_queryset(response_sets, survey_cache)))

    if split_entities:
        result_dict = OrderedDict()
        series = []
        for ds, qs in result:
            series.append(ds)

        for ds, qs in result:
            filter_dict = OrderedDict()
            for rs in qs:
                #Assign all responsesets to the entity that owns them
                filter_dict.setdefault(rs.entity, []).append(rs)

            for entity, data in filter_dict.items():
                if entity not in result_dict.keys():
                    for s in series:
                        # Ensure all dataseries are present in entity output
                        result_dict.setdefault(entity, OrderedDict())[s] = []
                result_dict[entity][ds] = data
        for entity, data in result_dict.items():
            result_dict[entity] = data.items()
        result = result_dict
    return result
class SpecDataBase:
    "A Base class for parsing Spec tables"
    def __init__(self, soup, elem=SpecDataElem):
        self.__elem = elem
        tabs = self.htmlTables(soup)
        self.__tables = OrderedDict()
        for tab in tabs:
            table = self.parseTable(tab)
            if table.name() not in self.__tables:
                self.__tables[table.name()] = table
            else:
                raise Exception("Table " + table.name() + " duplicated!")

    def getElem(self):
        return self.__elem

    def htmlTables(self, soup):
        raise Exception("htmlTables() must be redefined!")

    def getNames(self):
        return self.__tables.keys()

    def getTable(self, name):
        return self.__tables[name]

    def parseTable(self, tab):
        raise Exception("parseTable() must be redefined!")
         

    def numTables(self):
        return(len(self.__tables))

    def toString(self, name):
        return self.__tables[name].toString()
 def test_iterators(self):
     pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
     shuffle(pairs)
     od = OrderedDict(pairs)
     self.assertEqual(list(od), [t[0] for t in pairs])
     self.assertEqual(list(od.keys()), [t[0] for t in pairs])
     self.assertEqual(list(od.values()), [t[1] for t in pairs])
     self.assertEqual(list(od.items()), pairs)
     self.assertEqual(list(reversed(od)), [t[0] for t in reversed(pairs)])
 def test_iterators(self):
     pairs = [('c', 1), ('b', 2), ('a', 3), ('d', 4), ('e', 5), ('f', 6)]
     shuffle(pairs)
     od = OrderedDict(pairs)
     self.assertEqual(list(od), [t[0] for t in pairs])
     self.assertEqual(list(od.keys()), [t[0] for t in pairs])
     self.assertEqual(list(od.values()), [t[1] for t in pairs])
     self.assertEqual(list(od.items()), pairs)
     self.assertEqual(list(reversed(od)),
                      [t[0] for t in reversed(pairs)])
Exemple #8
0
def aminoacids(a):  # Lo mismo que está en aa_handle todo llamarla desde el otro archivo ¿Cómo?
    from os.path import expanduser
    from ordereddict import OrderedDict

    aa_file = expanduser("~") + '/QB9-git/SuperMarioQB9/resources/aminoacidos'
    result = OrderedDict()
    res = OrderedDict()

    with open(aa_file) as aminoa:
        line = aminoa.readline().replace("\n", '')
        while line != '':
            letra = line[0]
            abr = line[2:5]
            nombre = line[6:]
            result[letra] = (abr, nombre)
            line = aminoa.readline().replace("\n", '')
            for o in range(0, len(result)):
                res[result.keys()[o]] = result[result.keys()[o]][a]
    return res
Exemple #9
0
    def process(self):
        input_text = self.artifact.input_data.data()

        if input_text.find("<!-- endsplit -->") > 0:
            body, footer = re.split("<!-- endsplit -->",
                                    input_text,
                                    maxsplit=1)
            sections = re.split("<!-- split \"(.+)\" -->\n", body)
            header = sections[0]

            pages = OrderedDict()
            index_content = None
            for i in range(1, len(sections), 2):
                if sections[i] == 'index':
                    index_content = sections[i + 1]
                else:
                    section_name = sections[i]
                    # TODO proper url/filename escaping
                    section_url = section_name.replace(" ", "-")

                    filename = "%s.html" % section_url
                    filepath = os.path.join(self.artifact.parent_dir(),
                                            filename)
                    pages[section_name] = filename

                    new_page = self.add_doc(filepath,
                                            header + sections[i + 1] + footer)

                    self.artifact.log.debug(
                        "added key %s to artifact %s ; links to file %s" %
                        (filepath, self.artifact.key, new_page.name))

            index_items = []
            for k in sorted(pages.keys()):
                index_items.append("""<li><a href="%s">%s</a></li>""" %
                                   (pages[k], k))

            output = []
            output.append(header)
            if index_content:
                output.append(index_content)

            if self.artifact.args.has_key("split-ul-class"):
                ul = "<ul class=\"%s\">" % self.artifact.args['split-ul-class']
            else:
                ul = "<ul class=\"split\">"

            output.append("%s\n%s\n</ul>" % (ul, "\n".join(index_items)))
            output.append(footer)
        else:
            # No endsplit found, do nothing.
            output = input_text

        self.artifact.output_data.set_data("\n".join(output))
def printClusters(tf_idf_values, dom_values, ind, centroids):
    xy_dict = dict(zip(tf_idf_values, dom_values))
    xy_dict_ordered = OrderedDict(sorted(xy_dict.items(), key=lambda item: item[0]))
    colors = ['r', 'y', 'm', 'c', 'b', 'g', 'r', 'y', 'm', 'c']
    ax.hold(True)
    ax.set_ylim(0, 1.1)
    ax.set_xlim(0, 1)
    #plt.grid(True, linestyle='-', color='0.75')
    ax.scatter(xy_dict_ordered.keys(), xy_dict_ordered.values(), s=30, c=colors[ind], marker='o', alpha=0.75, linewidths=.1)
    ax.plot(xy_dict_ordered.keys(), xy_dict_ordered.values(), linestyle='-', c=colors[ind], alpha=.40)
    # centroids
    ax.scatter(centroids[ind], 1, marker='o', s=300, linewidths=1, c='w', alpha=0.60)
    ax.scatter(centroids[ind], 1, marker='x', s=300, linewidths=1, c='k', alpha=0.60 )
    ax.vlines(x=centroids[ind], ymin=0, ymax=1, color='k', linestyles='solid', alpha=0.40)

    #avg_dom = sum(dom_values)/len(dom_values)
    #min_val = min([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5])
    #max_val = max([tf_idf for index, tf_idf in enumerate(tf_idf_values) if dom_values[index] > .5])
    #ax.plot([min_val, centroids[ind], max_val], [0, 1, 0], linewidth=0.3, color='black')
    #print min_val, max_val
    canvas.print_figure('arun_plot.pdf', dpi=700)
Exemple #11
0
def rotate_nested_dict(d, key):
    """Given a dictionary of dictionarties, it builds a new one with keys 
    taken from children's values.
    """
    output = OrderedDict()  #{}
    for item in d:
        if key in d[item]:
            if d[item][key] not in output.keys():
                output[d[item][key]] = [d[item]]
            else:
                output[d[item][key]].append(d[item])
    return output
Exemple #12
0
def rotate_nested_dict(d, key):
    """Given a dictionary of dictionarties, it builds a new one with keys 
    taken from children's values.
    """
    output = OrderedDict()#{}
    for item in d:
        if key in d[item]:
            if d[item][key] not in output.keys():
                output[d[item][key]] = [d[item]]
            else:
                output[d[item][key]].append(d[item])
    return output
Exemple #13
0
    def process(self):
        parent_dir = os.path.dirname(self.artifact.canonical_filename())
        input_text = self.artifact.input_text()

        if input_text.find("<!-- endsplit -->") > 0:
            body, footer = re.split("<!-- endsplit -->", input_text, maxsplit=1)
            sections = re.split("<!-- split \"(.+)\" -->\n", body)
            header = sections[0]

            pages = OrderedDict()
            index_content = None
            for i in range(1, len(sections), 2):
                if sections[i] == 'index':
                    index_content = sections[i+1]
                else:
                    section_name = sections[i]
                    # TODO proper url/filename escaping
                    section_url = section_name.replace(" ","-")

                    filename = "%s.html" % section_url
                    filepath = os.path.join(parent_dir, filename)
                    pages[section_name] = filename

                    artifact = self.artifact.__class__(filepath)
                    artifact.ext = '.html'
                    artifact.binary = False
                    artifact.final = True
                    artifact.additional = True
                    artifact.artifacts_dir = self.artifact.artifacts_dir
                    artifact.set_data(header + sections[i+1] + footer)
                    artifact.hashstring = str(uuid.uuid4())
                    artifact.save()

                    self.artifact.inputs()[filepath] = artifact
                    self.log.debug("added key %s to artifact %s ; links to file %s" %
                              (filepath, self.artifact.key, artifact.filename()))

            index_items = []
            for k in sorted(pages.keys()):
                index_items.append("""<li><a href="%s">%s</a></li>""" %
                                   (pages[k], k))

            output_dict = OrderedDict()
            output_dict['header'] = header
            if index_content:
                output_dict['index-page-content'] = index_content
            output_dict['index'] = "<ul>\n%s\n</ul>" % "\n".join(index_items)
            output_dict['footer'] = footer
        else:
            # No endsplit found, do nothing.
            output_dict = self.artifact.input_data_dict
        self.artifact.data_dict = output_dict
Exemple #14
0
    def run_pattern_response(self, cmd_args, out_stream=sys.stdout, verbose=True,
                             prefix=None, postfix=None, pattern_response=None):
        """
        Run the external command and interact with it using the patter_response dictionary
        :param cmd_args: command line arguments
        :param out_stream: stream verbose messages are written to
        :param verbose: output messages if asserted
        :param prefix: command line arguments prepended to the given cmd_args
        :param postfix: command line arguments appended to the given cmd_args
        :param pattern_response: dictionary whose key is a regular expression pattern that when matched
            results in the value being sent to the running process.  If the value is None, then no response is sent.
        :returns: the output of the command
        :rtype: str
        """
        # info("run_pattern_response verbose: %s" % repr(verbose))
        self.display("run_pattern_response(%s)\n\n" % cmd_args, out_stream=out_stream, verbose=verbose)
        if pattern_response is None:
            pattern_response = OrderedDict()
            pattern_response[r'\[\S+\](?<!\[sudo\]) '] = CR    # accept default prompts, don't match "[sudo] "
            if self.password is not None:
                pattern_response[r'\[sudo\] password for \S+\:'] = self.password + CR

        pattern_response[MOVEMENT] = None
        pattern_response[pexpect.TIMEOUT] = CR

        patterns = list(pattern_response.keys())

        args = self.expand_args(cmd_args, prefix=prefix, postfix=postfix)
        command_line = ' '.join(args)
        # self.display("{line}\n\n".format(line=command_line), out_stream=out_stream, verbose=verbose)

        output = []
        try:
            child = pexpect.spawn(command_line)
            while True:
                try:
                    index = child.expect(patterns, timeout=120)
                    self.display(str(child.before), out_stream=out_stream, verbose=verbose)
                    output.append(str(child.before))
                    if child.after:
                        self.display(str(child.after), out_stream=out_stream, verbose=verbose)
                        output.append(str(child.after))

                    key = patterns[index]
                    response = pattern_response[key]
                    if response:
                        child.sendline(response)
                except pexpect.EOF:
                    break
        except pexpect.ExceptionPexpect as ex:
            self.display(str(ex) + '\n', out_stream=out_stream, verbose=verbose)
        return ''.join(output)
Exemple #15
0
    def process(self):
        input_text = self.artifact.input_data.data()

        if input_text.find("<!-- endsplit -->") > 0:
            body, footer = re.split("<!-- endsplit -->", input_text, maxsplit=1)
            sections = re.split("<!-- split \"(.+)\" -->\n", body)
            header = sections[0]

            pages = OrderedDict()
            index_content = None
            for i in range(1, len(sections), 2):
                if sections[i] == 'index':
                    index_content = sections[i+1]
                else:
                    section_name = sections[i]
                    # TODO proper url/filename escaping
                    section_url = section_name.replace(" ","-")

                    filename = "%s.html" % section_url
                    filepath = os.path.join(self.artifact.parent_dir(), filename)
                    pages[section_name] = filename

                    new_page = self.add_doc(filepath, header + sections[i+1] + footer)

                    self.artifact.log.debug("added key %s to artifact %s ; links to file %s" %
                              (filepath, self.artifact.key, new_page.name))

            index_items = []
            for k in sorted(pages.keys()):
                index_items.append("""<li><a href="%s">%s</a></li>""" %
                                   (pages[k], k))

            output = []
            output.append(header)
            if index_content:
                output.append(index_content)

            if self.artifact.args.has_key("split-ul-class"):
                ul = "<ul class=\"%s\">" % self.artifact.args['split-ul-class']
            else:
                ul = "<ul class=\"split\">"

            output.append("%s\n%s\n</ul>" % (ul, "\n".join(index_items)))
            output.append(footer)
        else:
            # No endsplit found, do nothing.
            output = input_text

        self.artifact.output_data.set_data("\n".join(output))
    def addAllPlots(self, graph, results, *args):
        prevYlist = [ 0 ] * len(results)
        plotData = OrderedDict()
        for category in self.labels.keys():
            currYlist = [ summary.get(category, 0) for _, summary in results ]
            if self.hasNonZero(currYlist):
                ylist = [ (currYlist[x] + prevYlist[x]) for x in range(len(prevYlist)) ]
                plotData[category] = prevYlist, ylist
                prevYlist = ylist

        for category in reversed(plotData.keys()):
            prevYlist, ylist = plotData[category]
            if not self.hasNonZero(prevYlist):
                # Adjust the bottom of the graph to avoid a huge block of green for large suites
                prevYlist = [ self.getGraphMinimum(ylist, plotData.values()[-1][-1]) ] * len(ylist)
            self.addPlot(prevYlist, ylist, graph, category=category, *args)
Exemple #17
0
class TaskFile(object):
    def __init__(self):
        self._file_name = None
        self._tasks = OrderedDict()

    def __str__(self):
        return '{0}'.format(self._file_name)

    def load(self, file_name):
        self._file_name = file_name
        with open(self._file_name) as f:
            tasks = yaml.load(f)
        if isinstance(tasks, list):
            for task in tasks:
                self._register_task(task)
        elif isinstance(tasks, dict):
            self._register_task(tasks)
        else:
            raise TaskFileError(file_name)

    def _register_task(self, task):
        try:
            name = task['TASK']
        except KeyError:
            raise TaskFileError(self._file_name)

        params = task.get('PARAMS') or []
        if not isinstance(params, list):
            params = shlex.split(params)

        body = task.get('DO') or []
        if not isinstance(body, list):
            body = [body]

        self._tasks[name] = Task(name, params, body, self)

    def get_task(self, task_name=None):
        try:
            if task_name:
                return self._tasks[task_name]
            else:
                return self._tasks[self._tasks.keys()[0]]
        except KeyError:
            raise TaskFileError(self._file_name,
                                'no such task: {0}'.format(task_name))
Exemple #18
0
class TaskFile(object):
    def __init__(self):
        self._file_name = None
        self._tasks = OrderedDict()

    def __str__(self):
        return '{0}'.format(self._file_name)

    def load(self, file_name):
        self._file_name = file_name
        with open(self._file_name) as f:
            tasks = yaml.load(f)
        if isinstance(tasks, list):
            for task in tasks:
                self._register_task(task)
        elif isinstance(tasks, dict):
            self._register_task(tasks)
        else:
            raise TaskFileError(file_name)

    def _register_task(self, task):
        try:
            name = task['TASK']
        except KeyError:
            raise TaskFileError(self._file_name)

        params = task.get('PARAMS') or []
        if not isinstance(params, list):
            params = shlex.split(params)

        body = task.get('DO') or []
        if not isinstance(body, list):
            body = [body]

        self._tasks[name] = Task(name, params, body, self)
        
    def get_task(self, task_name=None):
        try:
            if task_name:
                return self._tasks[task_name]
            else:
                return self._tasks[self._tasks.keys()[0]]
        except KeyError:
            raise TaskFileError(self._file_name, 'no such task: {0}'.format(task_name))
Exemple #19
0
class XLSSheetDefinition(object):
    def __init__(self, heading_row=0, fields=None):
        self.heading_row = heading_row
        self.fields = fields if fields is not None else []

    def load_yaml(self, filepath):
        self.fields = OrderedDict()
        with open(filepath, "r") as fp:
            data = yaml.load(fp)
            self.heading_row = data.get("heading_row", self.heading_row)
            for fielddef in data.get("fields", []):
                for name, fdef in fielddef.iteritems():
                    field = XLSField(name)
                    if fdef is not None:
                        for key, val in fdef.iteritems():
                            setattr(field, key, val)
                    self.fields[name] = field

    def names(self):
        return self.fields.keys()
    
    def unique(self):
        return [f for f in self.fields.values() if f.unique]

    def multiple(self):
        return [f for f in self.fields.values() if f.multiple]

    def i18n(self):
        return [f for f in self.fields.values() if f.i18n]

    def required(self):
        return [f for f in self.fields.values() if f.required]

    def date(self):
        return [f for f in self.fields.values() if f.type=="date"]

    def oftype(self, type):
        return [f for f in self.fields.values() if f.type==type]

    def limited(self):
        return [f for f in self.fields.values() if f.limit]

    def choices(self):
        return [f for f in self.fields.values() if f.choices is not None]
class TableEntry:
    "A generic class that holds table-based data"
    def __init__(self, attrs):
        self.__data  = OrderedDict()
        for attr in attrs:
            self.__data[attr] = None

    def attrs(self): 
        return self.__data.keys()

    def update(self, attr, data, allowDup=False):
        if attr in self.__data:
            if self.__data[attr] != None:
                if allowDup:
                    if self.__data[attr] != data:
                        pass
                    #print str("Possible duplicate, exists=" 
                    #+ self.__data[attr] + 
                    #" , new=" + data)
                else:
                    raise Exception("Already updated attribute:" + attr)
            #print attr, data
            self.__data[attr] = data
        else:
            pass
        #print "Skipping attr " + attr
        #raise Exception(str("Bad attribute:" + attr))

    def get(self, attr):
        if attr not in self.attrs():
            raise Exception(str("Bad attribute:" + attr))
        return self.__data[attr]


    def toString(self):
        ret = str()
        tmp = self.attrs()
        ret += str(self.__data[tmp[0]]).replace(",", ";")
        for idx in range(1, len(tmp)):
            ret += str("," + str(self.__data[tmp[idx]]).replace(",",";"))
        ret += "\n"
        return ret
Exemple #21
0
    def process(self):
        parent_dir = os.path.dirname(self.artifact.canonical_filename())
        input_text = self.artifact.input_text()

        if input_text.find("%% endsplit\n") > 0:
            body, footer = re.split("%% endsplit\n", input_text, maxsplit=1)
            sections = re.split("%% split \"(.+)\"\n", body)
            header = sections[0]

            pages = OrderedDict()
            for i in range(1, len(sections), 2):
                section_name = sections[i]
                # TODO proper url/filename escaping
                section_url = section_name.replace(" ","-")
                source = header + sections[i+1] + footer

                ext = '.tex'

                filename = "%s%s" % (section_url, ext)
                filepath = os.path.join(parent_dir, filename)
                pages[section_name] = filename

                artifact = self.artifact.__class__(filepath)
                artifact.ext = ext
                artifact.binary = False
                artifact.final = True
                artifact.artifacts_dir = self.artifact.artifacts_dir
                artifact.hashstring = str(uuid.uuid4())
                artifact.set_data(source)
                artifact.save()

                self.artifact.inputs()[filepath] = artifact
                self.artifact.log.debug("added key %s to artifact %s ; links to file %s" %
                          (filepath, self.artifact.key, artifact.filename()))

            index_items = []
            for k in sorted(pages.keys()):
                index_items.append("""<li><a href="%s">%s</a></li>""" %
                                   (pages[k], k))

        output_dict = self.artifact.input_data_dict
        self.artifact.data_dict = output_dict
Exemple #22
0
    def addAllPlots(self, graph, results, *args):
        prevYlist = [0] * len(results)
        plotData = OrderedDict()
        for category in self.labels.keys():
            currYlist = [summary.get(category, 0) for _, summary in results]
            if self.hasNonZero(currYlist):
                ylist = [(currYlist[x] + prevYlist[x])
                         for x in range(len(prevYlist))]
                plotData[category] = prevYlist, ylist
                prevYlist = ylist

        for category in reversed(plotData.keys()):
            prevYlist, ylist = plotData[category]
            if not self.hasNonZero(prevYlist):
                # Adjust the bottom of the graph to avoid a huge block of green for large suites
                prevYlist = [
                    self.getGraphMinimum(ylist,
                                         plotData.values()[-1][-1])
                ] * len(ylist)
            self.addPlot(prevYlist, ylist, graph, category=category, *args)
Exemple #23
0
def qacct_to_dict(text, tasks=False, order_list=None):
    """text is an output from qaccet -j command. When tasks=True, 
    it splits info into per task dictionaries. Returns an OrderedDict class."""
    def getValue(value):
        if value.isdigit():
            value = int(value)
        else:
            try:
                value = float(value)
            except:
                pass
        return value

    def reorder_dict(d, l):
        out = OrderedDict()
        left = []
        for key in l:
            if key in d.keys():
                out[key] = d[key]
        for key in d:
            if key not in out:
                out[key] = d[key]
        return out

    f = text.split(62 * "=")
    out = OrderedDict()
    for job in f:
        j = OrderedDict()
        job = job.split("\n")
        for tag in job:
            tag = tag.strip().split()
            if len(tag) > 1:
                j[tag[0]] = getValue(" ".join(tag[1:]))
        if order_list:
            j = reorder_dict(j, order_list)
        if j.keys():
            if not tasks:
                out[str(j['jobnumber'])] = j
            else:
                out[".".join([str(j['jobnumber']), str(j['taskid'])])] = j
    return out
Exemple #24
0
def mapCallings(client, data_dir='data', template_root='templates'):
    data_fp = FilePath(data_dir)
    output_root = data_fp.child('output')
    if not output_root.exists():
        output_root.makedirs()
    jenv = Environment(loader=FileSystemLoader(template_root))
    jenv.filters['abbr'] = abbreviateCalling
    jenv.globals['math'] = math
    template = jenv.get_template('callingmap.html')

    #members = client.getRawValue('member_list')
    callings = client.getRawValue('members_with_callings')
    no_calling = client.getRawValue('members_without_callings')
    no_calling = [x for x in no_calling if x['age'] >= 12]

    # get the groups and subgroups organized into dicts
    groups = OrderedDict()
    by_suborg = {}
    for line in prefOrder:
        if line.startswith('#'):
            # heading
            groups[line[1:].strip()] = OrderedDict()
        elif line.strip():
            subgroup_key = line.strip()
            groups[groups.keys()
                   [-1]][subgroup_key] = by_suborg[subgroup_key] = []

    # put each calling into the right subgroup
    # also count the number of callings per person
    calling_counts = defaultdict(lambda: 0)
    for calling in callings:
        suborg = calling['subOrgType'] or calling['organization']
        by_suborg[suborg].append(calling)
        calling_counts[calling['id']] += 1

    fp = output_root.child('callingmap.html')
    fp.setContent(
        template.render(orgs=groups,
                        calling_counts=calling_counts,
                        no_calling=no_calling).encode('utf-8'))
    print 'wrote', fp.path
Exemple #25
0
    def init_centroids(self, docs, limit):
        'pick the inital centroids among the data points'
        'the candidates should be stored in self.centroids'
        # merge all the points
        points = []
        for doc in docs:
            points.extend(doc)
        
        import random
        from ordereddict import OrderedDict
        centroids = []
        weighted_points = {}
        for point in points:
            weight = random.random() * 10000
            weighted_points[point] = weight

        # sort out limit number of candidate centroids
        weighted_points = OrderedDict(sorted(weighted_points.items(), key=lambda d: -d[1]))
        for i in range(limit):
            centroids.append(weighted_points.keys()[i])
        return points, centroids
Exemple #26
0
def mapCallings(client, data_dir='data', template_root='templates'):
    data_fp = FilePath(data_dir)
    output_root = data_fp.child('output')
    if not output_root.exists():
        output_root.makedirs()
    jenv = Environment(loader=FileSystemLoader(template_root))
    jenv.filters['abbr'] = abbreviateCalling
    jenv.globals['math'] = math
    template = jenv.get_template('callingmap.html')
    
    #members = client.getRawValue('member_list')
    callings = client.getRawValue('members_with_callings')
    no_calling = client.getRawValue('members_without_callings')
    no_calling = [x for x in no_calling if x['age'] >= 12]

    # get the groups and subgroups organized into dicts
    groups = OrderedDict()
    by_suborg = {}
    for line in prefOrder:
        if line.startswith('#'):
            # heading
            groups[line[1:].strip()] = OrderedDict()
        elif line.strip():
            subgroup_key = line.strip()
            groups[groups.keys()[-1]][subgroup_key] = by_suborg[subgroup_key] = []
    
    # put each calling into the right subgroup
    # also count the number of callings per person
    calling_counts = defaultdict(lambda:0)
    for calling in callings:
        suborg = calling['subOrgType'] or calling['organization']
        by_suborg[suborg].append(calling)
        calling_counts[calling['id']] += 1

    fp = output_root.child('callingmap.html')
    fp.setContent(template.render(
        orgs=groups,
        calling_counts=calling_counts,
        no_calling=no_calling).encode('utf-8'))
    print 'wrote', fp.path
Exemple #27
0
def qacct_to_dict(text, tasks=False, order_list=None):
    """text is an output from qaccet -j command. When tasks=True, 
    it splits info into per task dictionaries. Returns an OrderedDict class."""
    def getValue(value):
        if value.isdigit(): 
            value = int(value)
        else:
            try: 
                value = float(value)
            except: 
                pass
        return value
    def reorder_dict(d, l):
        out = OrderedDict()
        left = []
        for key in l:
            if key in d.keys():
                out[key] = d[key]
        for key in d:
            if key not in out:
                out[key] = d[key]
        return out

    f = text.split(62*"=")
    out = OrderedDict()
    for job in f:
        j = OrderedDict()
        job = job.split("\n")
        for tag in job:
            tag = tag.strip().split()
            if len(tag) > 1:
                j[tag[0]] = getValue(" ".join(tag[1:]))
        if order_list: 
            j = reorder_dict(j, order_list)
        if j.keys():
            if not tasks:
                out[str(j['jobnumber'])] = j
            else:
                out[".".join([str(j['jobnumber']), str(j['taskid'])])] = j
    return out
Exemple #28
0
def group_member_export(request, group_slug):
    """
    Export all group members for a specific group
    """
    group = get_object_or_404(Group, slug=group_slug)

    # if they can edit it, they can export it
    if not has_perm(request.user, 'user_groups.change_group', group):
        raise Http403

    import xlwt
    from ordereddict import OrderedDict
    from django.db import connection

    # create the excel book and sheet
    book = xlwt.Workbook(encoding='utf8')
    sheet = book.add_sheet('Group Members')

    # the key is what the column will be in the
    # excel sheet. the value is the database lookup
    # Used OrderedDict to maintain the column order
    group_mappings = OrderedDict([
        ('user_id', 'au.id'),
        ('first_name', 'au.first_name'),
        ('last_name', 'au.last_name'),
        ('email', 'au.email'),
        ('receives email', 'pp.direct_mail'),
        ('company', 'pp.company'),
        ('address', 'pp.address'),
        ('address2', 'pp.address2'),
        ('city', 'pp.city'),
        ('state', 'pp.state'),
        ('zipcode', 'pp.zipcode'),
        ('country', 'pp.country'),
        ('phone', 'pp.phone'),
        ('is_active', 'au.is_active'),
        ('date', 'gm.create_dt'),
    ])
    group_lookups = ','.join(group_mappings.values())

    # Use custom sql to fetch the rows because we need to
    # populate the user profiles information and you
    # cannot do that with django's ORM without using
    # profile for each user query
    # pulling 13,000 group members can be done in one
    # query using Django's ORM but then you need
    # 13,000 individual queries :(
    cursor = connection.cursor()
    sql = "SELECT %s FROM user_groups_groupmembership gm \
           INNER JOIN auth_user au ON (au.id = gm.member_id) \
           LEFT OUTER JOIN profiles_profile pp \
           on (pp.user_id = gm.member_id) WHERE group_id = %%s;"

    sql = sql % group_lookups
    cursor.execute(sql, [group.pk])
    values_list = list(cursor.fetchall())

    # Append the heading to the list of values that will
    # go into the excel sheet
    values_list.insert(0, group_mappings.keys())

    # excel date styles
    default_style = xlwt.Style.default_style
    datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm')
    date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy')

    if values_list:
        # Write the data enumerated to the excel sheet
        for row, row_data in enumerate(values_list):
            for col, val in enumerate(row_data):
                # styles the date/time fields
                if isinstance(val, datetime):
                    style = datetime_style
                elif isinstance(val, date):
                    style = date_style
                else:
                    style = default_style
                sheet.write(row, col, val, style=style)

    response = HttpResponse(content_type='application/vnd.ms-excel')
    response[
        'Content-Disposition'] = 'attachment; filename=group_%s_member_export.xls' % group.pk
    book.save(response)
    return response
 def build_dict(self, res):
    d = OrderedDict()
    for i in res:
       if i['d'] not in d.keys():
          d[i['d']] = i['ex']
    return d
Exemple #30
0
class FormData(object):

    implements(IFormData)

    def __init__(self, data=None):

        if not data:
            data = {}
        super(FormData, self).__init__()
        self._fields = OrderedDict()
        self.from_dict(data)

    def __repr__(self):

        reprlist = ["FormData:", ""]

        for field in self._fields.keys():

            value = self._fields[field].value

            # small hack for fields that have a dict as value (files)
            if isinstance(value, dict):
                if 'name' in value:
                    value = value['name']

            reprlist.append("%s: %s\n" % (field, value))

        return "\n".join(reprlist)

    def __getitem__(self, fieldId):
        """ Always return something... even if the data isn't
        there. This allows for a somewhat lax policy in evaluation of
        requiredness, relevance, etc.
        """

        try:
            return self._fields[fieldId].value
        except:
            return None

    def __setitem__(self, fieldId, val):
        """ Item assignment on formdata. Setting the value of a non existing
        field is NOT an error... """

        if not fieldId in self._fields:
            self._fields[fieldId] = Field(fieldId, val)
        else:
            self._fields[fieldId].value = val

    def getField(self, fieldId):

        return self._fields.get(fieldId, None)

    def addField(self, field):

        self._fields[field.id] = field

    def getFields(self):

        return self._fields.keys()

    def update(self, data, ignore_missing=True):
        """ Update self with fields from data arg """

        for field_id in data.getFields():
            field = data.getField(field_id)
            if self.getField(field_id):
                self.getField(field_id).value = field.value
            else:
                if not ignore_missing:
                    self.addField(Field(field.id, field.value))

    def as_dict(self):

        res = {}

        for field_id in self._fields.keys():

            res[field_id] = self._fields[field_id].value

        return res

    def from_dict(self, data=None):
        """ Set the form fields and values from a dict """
        if data:
            for key, val in data.items():
                self[key] = val
Exemple #31
0
class Element(object):
    def __init__(self,
                 elem_name,
                 primaries,
                 parameter_dict,
                 slurp_flags,
                 fn_list=None,
                 del_fn=None):
        self.elem_name = elem_name
        self.parameter = parameter_dict

        self.primaries = primaries

        self.writables = OrderedDict()
        for fn in fn_list:
            self.writables.update(fn.unique_writables)

        self.write_fns = dict([(fn.name, fn) for fn in fn_list])

        self.del_fn = del_fn
        self.slurp_flags = slurp_flags

    def __repr__(self):
        return '%s, %s, %s, %s' % (self.elem_name, str(
            self.parameter.keys()), str(
                self.primaries), str(self.writables.keys()))

    def required_writables(self):
        """Return a list of the required writable options"""
        return self.writables.keys()

    def required_readables(self):
        """Return a list of the required readable options.

        This includes the primaries at the start of the returned list, in the
        order they are required in and the readable names of the writable
        options at the end."""
        readables = list(self.primaries)
        for option in self.required_writables():
            if option not in readables:
                readables.append(self.writables[option].read_param)
        return readables

    def _slurp(self, req_read_names, sid=-1, flags=None):
        """Return a iterator of the slurped data"""
        if not flags:
            flags = self.slurp_flags

        if app_settings.USE_CASPY:
            return caspy_info(self.elem_name,
                              req_read_names,
                              sid=sid,
                              flags=flags)
        else:
            return subsystem_info(self.elem_name,
                                  req_read_names,
                                  sid=sid,
                                  flags=flags)

    slurp = _slurp

    def needs_transform(self, options):
        for opt in options:
            if self.writables[opt].trns_fn:
                return True

    def map_func(self, read_opts, options):
        if self.needs_transform(options):
            trns = [self.writables[opt_name].trns_fn for opt_name in options]

            def transform(vals):
                return [
                    trns_fn(val) if trns_fn else val
                    for trns_fn, val in zip(trns, vals)
                ]

            if len(read_opts) == len(options):
                # just transform the data:
                def zipper(options, vals):
                    return zip(options, transform(vals))
            else:
                # Some read data needs to be added.
                val_ind = [
                    read_opts.index(self.writables[opt_name].read_param)
                    for opt_name in options
                ]

                def zipper(options, vals):
                    return zip(options,
                               transform([vals[ind] for ind in val_ind]))

            return zipper

        elif len(read_opts) != len(options):
            # Some read data needs to be added.
            val_ind = [
                read_opts.index(self.writables[opt_name].read_param)
                for opt_name in options
            ]

            def zipper(options, vals):
                return zip(options, [vals[ind] for ind in val_ind])

            return zipper
        else:
            # just a regular zipping of files.
            return zip
Exemple #32
0
def group_member_export(request, group_slug):
    """
    Export all group members for a specific group
    """
    group = get_object_or_404(Group, slug=group_slug)

    # if they can edit it, they can export it
    if not has_perm(request.user,'user_groups.change_group', group):
        raise Http403

    import xlwt
    from ordereddict import OrderedDict
    from django.db import connection

    # create the excel book and sheet
    book = xlwt.Workbook(encoding='utf8')
    sheet = book.add_sheet('Group Members')
    
    # the key is what the column will be in the
    # excel sheet. the value is the database lookup
    # Used OrderedDict to maintain the column order
    group_mappings = OrderedDict([
        ('user_id', 'au.id'),
        ('first_name', 'au.first_name'),
        ('last_name', 'au.last_name'),
        ('email', 'au.email'),
        ('receives email', 'pp.direct_mail'),
        ('company', 'pp.company'),
        ('address', 'pp.address'),
        ('address2', 'pp.address2'),
        ('city', 'pp.city'),
        ('state', 'pp.state'),
        ('zipcode', 'pp.zipcode'),
        ('country', 'pp.country'),
        ('phone', 'pp.phone'),
        ('is_active', 'au.is_active'),
        ('date', 'gm.create_dt'),
    ])
    group_lookups = ','.join(group_mappings.values())

    # Use custom sql to fetch the rows because we need to
    # populate the user profiles information and you
    # cannot do that with django's ORM without using
    # get_profile() for each user query
    # pulling 13,000 group members can be done in one
    # query using Django's ORM but then you need
    # 13,000 individual queries :(
    cursor = connection.cursor()
    sql = "SELECT %s FROM user_groups_groupmembership gm \
           INNER JOIN auth_user au ON (au.id = gm.member_id) \
           LEFT OUTER JOIN profiles_profile pp \
           on (pp.user_id = gm.member_id) WHERE group_id = %%s;"
    sql =  sql % group_lookups
    cursor.execute(sql, [group.pk])
    values_list = list(cursor.fetchall())

    # Append the heading to the list of values that will
    # go into the excel sheet
    values_list.insert(0, group_mappings.keys())
    
    # excel date styles
    default_style = xlwt.Style.default_style
    datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm')
    date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy')

    if values_list:
        # Write the data enumerated to the excel sheet
        for row, row_data in enumerate(values_list):
            for col, val in enumerate(row_data):
                # styles the date/time fields
                if isinstance(val, datetime):
                    style = datetime_style
                elif isinstance(val, date):
                    style = date_style
                else:
                    style = default_style
                sheet.write(row, col, val, style=style)

    response = HttpResponse(mimetype='application/vnd.ms-excel')
    response['Content-Disposition'] = 'attachment; filename=group_%s_member_export.xls' % group.pk
    book.save(response)
    return response
Exemple #33
0
    def run_pattern_response(self, cmd_args, out_stream=sys.stdout, verbose=True,
                             prefix=None, postfix=None,
                             pattern_response=None, accept_defaults=False,
                             timeout=1200):
        """
        Run the external command and interact with it using the patter_response dictionary
        :param timeout:
        :param accept_defaults:
        :param cmd_args: command line arguments
        :param out_stream: stream verbose messages are written to
        :param verbose: output messages if asserted
        :param prefix: command line arguments prepended to the given cmd_args
        :param postfix: command line arguments appended to the given cmd_args
        :param pattern_response: dictionary whose key is a regular expression pattern that when matched
            results in the value being sent to the running process.  If the value is None, then no response is sent.
        """
        pattern_response_dict = OrderedDict(pattern_response or {})

        if accept_defaults:
            sudo_pattern = 'password for {user}: '.format(user=Project.user)
            sudo_response = "{password}\r".format(password=Project.password)
            pattern_response_dict[sudo_pattern] = sudo_response
            # accept default prompts, don't match "[sudo] "
            pattern_response_dict[r'\[\S+\](?<!\[sudo\])(?!\S)'] = CR

        pattern_response_dict[MOVEMENT] = None
        pattern_response_dict[pexpect.TIMEOUT] = None

        patterns = list(pattern_response_dict.keys())
        patterns.append(self.ssh.PROMPT)

        args = self.expand_args(cmd_args, prefix=prefix, postfix=postfix)
        command_line = ' '.join(args)
        # info("pattern_response_dict => %s" % repr(pattern_response_dict))
        # self.display("{line}\n".format(line=command_line), out_stream=out_stream, verbose=verbose)

        output = []

        self.ssh.prompt(timeout=0.1)     # clear out any pending prompts
        self._report(output, out_stream=out_stream, verbose=verbose)
        self.ssh.sendline(command_line)
        while True:
            try:
                index = self.ssh.expect(patterns)
                if index == patterns.index(pexpect.TIMEOUT):
                    print("ssh.expect TIMEOUT")
                else:
                    self._report(output, out_stream=out_stream, verbose=verbose)
                    if index == patterns.index(self.ssh.PROMPT):
                        break

                    key = patterns[index]
                    response = pattern_response_dict[key]
                    if response:
                        sleep(0.1)
                        self.ssh.sendline(response)
            except pexpect.EOF:
                self._report(output, out_stream=out_stream, verbose=verbose)
                break
        self.ssh.prompt(timeout=0.1)
        self._report(output, out_stream=out_stream, verbose=verbose)
        return ''.join(output).splitlines()
Exemple #34
0
class ResourceBuilder(object):
    """ Helper to create a ressource """
    def __init__(self, name=None, required=False):
        self._name = name
        self._fields = OrderedDict()
        self._required = required

    def add_field(self,
                  field,
                  arg=None,
                  value=None,
                  extended=False,
                  hidden=False,
                  e_type=str,
                  required=None):
        """Add a new field to the current ResourceBuilder.

           Keyword arguments:
           field    -- field name
           arg      -- name of the attribute name in arg object (argparse)
           value    -- a default for this field, used for resource creation.
           extended -- If set to true, the current field will be display in
                       extended list mode only.
           hidden   -- If set to true, the current field won't be exposed
                       as available keys.
           e_type   -- field data type (default str)
           required -- True if the current field is required for create
                       and update methods
        """
        if required is None:
            required = self._required
        if arg is None:
            arg = re.sub('(?!^)([A-Z]+)', r'_\1', field).lower()
        self._fields[field] = {
            'field': field,
            'arg': arg,
            'value': value,
            'extended': extended,
            'required': required,
            'e_type': e_type,
            'hidden': hidden
        }

    def get_keys(self, extended=False):
        res = []
        for field in self._fields.values():
            if field['hidden']:
                continue
            if not field['extended']:
                res.append(field['field'])
            if extended and field['extended']:
                res.append(field['field'])
        return res

    def get_fields(self, extended=False, full=False):
        res = []
        if extended:
            for field in self._fields.values():
                if field['extended']:
                    res.append(field['field'])
        elif full:
            for field in self._fields.keys():
                res.append(field)
        else:
            for field in self._fields.values():
                if not field['extended']:
                    res.append(field['field'])
        return res

    def set_arg(self, key, arg):
        field = self._fields.get(key, None)
        if field is not None:
            field['arg'] = arg

    def get_value(self, key):
        field = self._fields.get(key, None)
        if field is not None:
            return field['value']
        else:
            return None

    def set_value(self, key, value):
        field = self._fields.get(key, None)
        if field is not None:
            field['value'] = value

    def to_resource(self):
        ret = {}
        for field in self._fields.values():
            ret[field['field']] = field['value']
        return ret

    def load_from_args(self, namespace):
        for field in self._fields.values():
            value = getattr(namespace, field['arg'], None)
            if value is not None:
                field['value'] = value

    def copy(self, data):
        if isinstance(data, dict):
            for field, val in self._fields.items():
                val['value'] = data.get(field, "")

        if isinstance(data, ResourceBuilder):
            for field, val in self._fields.items():
                val['value'] = data[field]['value']

    def __str__(self):
        return json.dumps(self.to_resource(), sort_keys=True, indent=2)

    def check_required_fields(self):
        for field in self._fields.values():
            if field['required']:
                value = field['value']
                if value is None:
                    raise ValueError("missing value for required field : " +
                                     field['field'])
                e_type = field['e_type']
                if e_type == int:
                    int(value)
                if e_type == float:
                    float(value)
Exemple #35
0
class Book:
    """
    Read an excel book that has mutliple sheets

    For csv file, there will be just one sheet
    """
    def __init__(self, filename=None, **keywords):
        """
        Book constructor

        Selecting a specific book according to filename extension
        """
        self.path = ""
        self.filename = "memory"
        self.name_array = []
        self.sheets = {}
        if is_string(type(filename)):
            if filename and os.path.exists(filename):
                self.load_from(filename, **keywords)
        elif isinstance(filename, tuple):
            self.load_from_memory(filename, **keywords)

    def load_from(self, file, **keywords):
        """Load content from physical file

        :param str file: the file name
        :param any keywords: additional parameters
        """
        path, filename = os.path.split(file)
        self.path = path
        self.filename = filename
        book = load_file(file, **keywords)
        sheets = book.sheets()
        self.load_from_sheets(sheets)

    def load_from_memory(self, the_tuple, **keywords):
        """Load content from memory content

        :param tuple the_tuple: first element should be file extension,
        second element should be file content
        :param any keywords: additional parameters
        """
        book = load_file(the_tuple, **keywords)
        sheets = book.sheets()
        self.load_from_sheets(sheets)

    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        self.sheets = OrderedDict()
        for name in sheets.keys():
            self.sheets[name] = self.get_sheet(sheets[name], name)
        self.name_array = list(self.sheets.keys())

    def get_sheet(self, array, name):
        """Create a sheet from a list of lists"""
        return Sheet(array, name)

    def __iter__(self):
        return SheetIterator(self)

    def number_of_sheets(self):
        """Return the number of sheets"""
        return len(self.name_array)

    def sheet_names(self):
        """Return all sheet names"""
        return self.name_array

    def sheet_by_name(self, name):
        """Get the sheet with the specified name"""
        return self.sheets[name]

    def sheet_by_index(self, index):
        """Get the sheet with the specified index"""
        if index < len(self.name_array):
            sheet_name = self.name_array[index]
            return self.sheets[sheet_name]

    def remove_sheet(self, sheet):
        if isinstance(sheet, int):
            if sheet < len(self.name_array):
                sheet_name = self.name_array[sheet]
                del self.sheets[sheet_name]
                self.name_array = list(self.sheets.keys())
            else:
                raise IndexError
        elif isinstance(sheet, str):
            if sheet in self.name_array:
                del self.sheets[sheet]
                self.name_array = list(self.sheets.keys())
            else:
                raise KeyError
        else:
            raise TypeError

    def __getitem__(self, key):
        if type(key) == int:
            return self.sheet_by_index(key)
        else:
            return self.sheet_by_name(key)

    def __delitem__(self, other):
        self.remove_sheet(other)
        return self

    def __add__(self, other):
        """Operator overloading

        example::

            book3 = book1 + book2
            book3 = book1 + book2["Sheet 1"]

        """
        content = {}
        a = to_dict(self)
        for k in a.keys():
            new_key = k
            if len(a.keys()) == 1:
                new_key = "%s_%s" % (self.filename, k)
            content[new_key] = a[k]
        if isinstance(other, Book):
            b = to_dict(other)
            for l in b.keys():
                new_key = l
                if len(b.keys()) == 1:
                    new_key = other.filename
                if new_key in content:
                    uid = uuid.uuid4().hex
                    new_key = "%s_%s" % (l, uid)
                content[new_key] = b[l]
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in content:
                uid = uuid.uuid4().hex
                new_key = "%s_%s" % (other.name, uid)
            content[new_key] = other.array
        else:
            raise TypeError
        c = Book()
        c.load_from_sheets(content)
        return c

    def __iadd__(self, other):
        """Operator overloading +=

        example::

            book += book2
            book += book2["Sheet1"]
        
        """
        if isinstance(other, Book):
            names = other.sheet_names()
            for name in names:
                new_key = name
                if len(names) == 1:
                    new_key = other.filename
                if new_key in self.name_array:
                    uid = uuid.uuid4().hex
                    new_key = "%s_%s" % (name, uid)
                self.sheets[new_key] = self.get_sheet(other[name].array,
                                                      new_key)
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in self.name_array:
                uid = uuid.uuid4().hex
                new_key = "%s_%s" % (other.name, uid)
            self.sheets[new_key] = self.get_sheet(other.array, new_key)
        else:
            raise TypeError
        self.name_array = list(self.sheets.keys())
        return self
Exemple #36
0
class Atoms:
    "Class to deal with a single frame of an xyz movie"

    def __init__(self, filename=None, *allocargs, **allockwargs):

        self._atomsptr = None
        self.alloc(*allocargs, **allockwargs)

        if filename is not None:
            self.read(filename)

    def alloc(self, n=0, n_int=0, n_real=3, n_str=1, n_logical=0, use_libatoms=False, atomsptr=None, properties=None, \
                 lattice=numpy.array([[100.,0.,0.],[0.,100.,0.],[0.,0.,100.]]), \
                 params=ParamReader(),element='Si'):

        if use_libatoms or atomsptr is not None:
            if atomsptr is None:
                self.attach(libatoms.atoms_initialise(n, lattice))
            else:
                self.attach(atomsptr)
        else:
            self.n = n
            self.lattice = lattice
            self.g = numpy.linalg.inv(self.lattice)
            self.params = params

            # Create single property for atomic positions
            self.real = numpy.zeros((self.n, n_real), dtype=float)
            self.int = numpy.zeros((self.n, n_int), dtype=int)
            self.str = numpy.zeros((self.n, n_str), dtype='S10')
            self.logical = numpy.zeros((self.n, n_logical), dtype=bool)

            if properties is None:
                self.properties = OrderedDict({
                    'species': ('S', slice(0, 1)),
                    'pos': ('R', slice(0, 3))
                })
            else:
                self.properties = properties

            self.repoint()

    def attach(self, atomsptr):
        self.finalise()
        self._atomsptr = atomsptr

        self.n, n_int, n_real, n_str, n_logical, iloc, rloc, sloc, lloc, latticeloc, gloc = \
                   libatoms.atoms_get_data(self._atomsptr)

        self.int = arraydata((self.n, n_int), int, iloc)
        self.real = arraydata((self.n, n_real), float, rloc)
        self.str = arraydata((self.n, n_str), 'S10', sloc)
        self.logical = arraydata((self.n, n_logical), bool, sloc)

        self.lattice = arraydata((3, 3), float, latticeloc)
        self.g = arraydata((3, 3), float, gloc)

        self.params = {}

        property_code_map = {1: 'I', 2: 'R', 3: 'S', 4: 'L'}
        self.properties = OrderedDict()
        for i in range(libatoms.atoms_n_properties(self._atomsptr)):
            key, (code, startcol, stopcol) = libatoms.atoms_nth_property(
                self._atomsptr, i + 1)
            self.properties[key.strip()] = (property_code_map[code],
                                            slice(startcol - 1, stopcol))

        self.repoint()

    def finalise(self):
        if self._atomsptr is not None:
            libatoms.atoms_finalise(self._atomsptr)
            self._atomsptr = None

    def __repr__(self):
        return 'Atoms(n=%d, properties=%s, params=%s, lattice=%s)' % \
              (self.n, repr(self.properties), repr(self.params), repr(self.lattice))

    def __cmp__(self, other):
        if other is None:
            return 1

        # Quick checks
        if (self.n != other.n) or (self.comment() != other.comment()):
            return 1

        # Check if arrays match one by one
        for this, that in \
            (self.lattice, other.lattice), \
            (self.real, other.real), (self.int, other.int), \
            (self.str, other.str), (self.logical, other.logical):

            if (not numpy.all(this == that)):
                return 1

        return 0

    def update(self, other):
        "Overwrite contents of this Atoms object with a copy of an other"
        self.n = other.n
        self.lattice = other.lattice.copy()
        self.g = other.g.copy()
        self.params = other.params.copy()
        self.properties = other.properties.copy()

        self.real = other.real[:]
        self.int = other.int[:]
        self.str = other.str[:]
        self.logical = other.logical[:]

        self.repoint()

    def add_property(self, name, value, ncols=1):
        "Add a new property to this Atoms object. Value can be a scalar int or float, or an array."

        # Scalar int or list of all ints
        if (type(value) == type(0)) or \
               ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(0))):
            n_int = self.int.shape[1]
            intcopy = self.int.copy()
            self.int = numpy.zeros((self.n, n_int + ncols), dtype=int)
            self.int[:, :n_int] = intcopy
            if ncols == 1:
                self.int[:, n_int] = value
            else:
                self.int[:, n_int:n_int + ncols] = value
            self.properties[name] = ('I', slice(n_int, n_int + ncols))
            self.repoint()

        # Scalar real or list of all reals
        elif (type(value) == type(0.0)) or \
                 (type(value) == type([]) and numpy.all(numpy.array(map(type,value)) == type(0.0))):
            n_real = self.real.shape[1]
            realcopy = self.real.copy()
            self.real = numpy.zeros((self.n, n_real + ncols), dtype=float)
            self.real[:, :n_real] = realcopy
            if ncols == 1:
                self.real[:, n_real] = value
            else:
                self.real[:, n_real:n_real + ncols] = value
            self.properties[name] = ('R', slice(n_real, n_real + ncols))
            self.repoint()

        # Scalar string or list of strings
        elif (type(value) == type('')) or \
               ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(''))):
            n_str = self.str.shape[1]
            strcopy = self.str.copy()
            self.str = numpy.zeros((self.n, n_str + ncols), dtype='S10')
            self.str[:, :n_str] = strcopy
            if ncols == 1:
                self.str[:, n_str] = value
            else:
                self.str[:, n_str:n_str + ncols] = value
            self.properties[name] = ('S', slice(n_str, n_str + ncols))
            self.repoint()

        # Scalar logical or list of logicals
        elif (type(value) == type(False)) or \
               ((type(value) == type([])) and numpy.all(numpy.array(map(type,value)) == type(False))):
            n_logical = self.logical.shape[1]
            logicalcopy = self.logical.copy()
            self.logical = numpy.zeros((self.n, n_logical + ncols), dtype=bool)
            self.logical[:, :n_logical] = logicalcopy
            if ncols == 1:
                self.logical[:, n_logical] = value
            else:
                self.logical[:, n_logical:n_logical + ncols] = value
            self.properties[name] = ('L', slice(n_logical, n_logical + ncols))
            self.repoint()

        # Array type
        elif type(value) == type(numpy.array([])):
            if value.shape[0] != self.n:
                raise ValueError('length of value array (%d) != number of atoms (%d)' % \
                                 (value.shape[0],self.n))

            if value.dtype.kind == 'f':
                try:
                    ncols = value.shape[1]
                except IndexError:
                    ncols = 1
                n_real = self.real.shape[1]
                realcopy = self.real.copy()
                self.real = numpy.zeros((self.n, n_real + ncols), dtype=float)
                self.real[:, :n_real] = realcopy
                if ncols == 1:
                    self.real[:, n_real] = value.copy()
                else:
                    self.real[:, n_real:n_real + ncols] = value.copy()
                self.properties[name] = ('R', slice(n_real, n_real + ncols))
                self.repoint()
            elif value.dtype.kind == 'i':
                try:
                    ncols = value.shape[1]
                except IndexError:
                    ncols = 1
                n_int = self.int.shape[1]
                intcopy = self.int.copy()
                self.int = numpy.zeros((self.n, n_int + ncols), dtype=int)
                self.int[:, :n_int] = intcopy

                if ncols == 1:
                    self.int[:, n_int] = value.copy()
                else:
                    self.int[:, n_int:n_int + ncols] = value.copy()
                self.properties[name] = ('I', slice(n_int, n_int + ncols))
                self.repoint()

            elif value.dtype.kind == 'S':
                try:
                    ncols = value.shape[1]
                except IndexError:
                    ncols = 1
                n_str = self.str.shape[1]
                strcopy = self.str.copy()
                self.str = numpy.zeros((self.n, n_str + ncols), dtype='S10')
                self.str[:, :n_str] = strcopy

                if ncols == 1:
                    self.str[:, n_str] = value.copy()
                else:
                    self.str[:, n_str:n_str + ncols] = value.copy()
                self.properties[name] = ('S', slice(n_str, n_str + ncols))
                self.repoint()

            elif value.dtype == numpy.dtype('bool'):
                try:
                    ncols = value.shape[1]
                except IndexError:
                    ncols = 1
                n_logical = self.logical.shape[1]
                logicalcopy = self.logical.copy()
                self.logical = numpy.zeros((self.n, n_logical + ncols),
                                           dtype=numpy.dtype('bool'))
                self.logical[:, :n_logical] = logicalcopy

                if ncols == 1:
                    self.logical[:, n_logical] = value.copy()
                else:
                    self.logical[:, n_logical:n_logical + ncols] = value.copy()
                self.properties[name] = ('S',
                                         slice(n_logical, n_logical + ncols))
                self.repoint()

            else:
                raise ValueError(
                    "Don't know how to add array property of type %r" %
                    value.dtype)

        else:
            raise ValueError("Don't know how to add property of type %r" %
                             type(value))

    def repoint(self):
        "Make pointers to columns in real and int"

        for prop, (ptype, cols) in self.properties.items():
            if ptype == 'R':
                if cols.stop - cols.start == 1:
                    setattr(self, prop, self.real[:, cols.start])
                else:
                    setattr(self, prop, self.real[:, cols])
            elif ptype == 'I':
                if cols.stop - cols.start == 1:
                    setattr(self, prop, self.int[:, cols.start])
                else:
                    setattr(self, prop, self.int[:, cols])
            elif ptype == 'S':
                if cols.stop - cols.start == 1:
                    setattr(self, prop, self.str[:, cols.start])
                else:
                    setattr(self, prop, self.str[:, cols])
            elif ptype == 'L':
                if cols.stop - cols.start == 1:
                    setattr(self, prop, self.logical[:, cols.start])
                else:
                    setattr(self, prop, self.logical[:, cols])
            else:
                raise ValueError('Bad property type :' +
                                 str(self.properties[prop]))

    def comment(self, properties=None):
        "Return the comment line for this Atoms object"

        if properties is None:
            props = self.properties.keys()
        else:
            props = properties

        lattice_str = 'Lattice="' + ' '.join(
            map(str, numpy.reshape(self.lattice, 9))) + '"'

        props_str = 'Properties=' + ':'.join(map(':'.join, \
                 zip(props, \
                     [self.properties[k][0] for k in props], \
                     [str(self.properties[k][1].stop-self.properties[k][1].start) for k in props])))

        return lattice_str + ' ' + props_str + ' ' + str(self.params)

    def _props_dtype(self, props=None):
        "Return a record array dtype for the specified properties (default all)"

        if props is None:
            props = self.properties.keys()

        result = []
        fmt_map = {'R': 'd', 'I': 'i', 'S': 'S10', 'L': 'bool'}

        for prop in props:
            ptype, cols = self.properties[prop]
            if cols.start == cols.stop - 1:
                result.append((prop, fmt_map[ptype]))
            else:
                for c in range(cols.stop - cols.start):
                    result.append((prop + str(c), fmt_map[ptype]))

        return numpy.dtype(result)

    def to_recarray(self, props=None):
        "Return a record array contains specified properties in order (defaults to all properties)"

        if props is None:
            props = self.properties.keys()

        # Create empty record array with correct dtype
        data = numpy.zeros(self.n, self._props_dtype(props))

        # Copy cols from self.real and self.int into data recarray
        for prop in props:
            ptype, cols = self.properties[prop]
            if ptype == 'R':
                if cols.start == cols.stop - 1:
                    data[prop] = self.real[:, cols.start]
                else:
                    for c in range(cols.stop - cols.start):
                        data[prop + str(c)] = self.real[:, cols.start + c]
            elif ptype == 'I':
                if cols.start == cols.stop - 1:
                    data[prop] = self.int[:, cols.start]
                else:
                    for c in range(cols.stop - cols.start):
                        data[prop + str(c)] = self.int[:, cols.start + c]
            elif ptype == 'S':
                if cols.start == cols.stop - 1:
                    data[prop] = self.str[:, cols.start]
                else:
                    for c in range(cols.stop - cols.start):
                        data[prop + str(c)] = self.str[:, cols.start + c]
            elif ptype == 'L':
                if cols.start == cols.stop - 1:
                    data[prop] = self.logical[:, cols.start]
                else:
                    for c in range(cols.stop - cols.start):
                        data[prop + str(c)] = self.logical[:, cols.start + c]
            else:
                raise ValueError('Bad property type :' +
                                 str(self.properties[prop][1]))

        return data

    def update_from_recarray(self, data, props=None):
        """Update Atoms data from a record array. By default all properties
      are updated; use the props argument to update only a subset"""

        if props is None:
            props = self.properties.keys()

        if data.dtype != self._props_dtype(props) or data.shape != (self.n, ):
            raise ValueError('Data shape is incorrect')

        # Copy cols from data recarray into self.real and self.int
        for prop in props:
            ptype, cols = self.properties[prop]
            if ptype == 'R':
                if cols.start == cols.stop - 1:
                    self.real[:, cols.start] = data[prop]
                else:
                    for c in range(cols.stop - cols.start):
                        self.real[:, cols.start + c] = data[prop + str(c)]
            elif ptype == 'I':
                if cols.start == cols.stop - 1:
                    self.int[:, cols.start] = data[prop]
                else:
                    for c in range(cols.stop - cols.start):
                        self.int[:, cols.start + c] = data[prop + str(c)]
            elif ptype == 'S':
                if cols.start == cols.stop - 1:
                    self.str[:, cols.start] = data[prop]
                else:
                    for c in range(cols.stop - cols.start):
                        self.str[:, cols.start + c] = data[prop + str(c)]
            elif ptype == 'L':
                if cols.start == cols.stop - 1:
                    self.logical[:, cols.start] = data[prop]
                else:
                    for c in range(cols.stop - cols.start):
                        self.logical[:, cols.start + c] = data[prop + str(c)]
            else:
                raise ValueError('Bad property type :' +
                                 str(self.properties[prop][1]))

    def read_xyz(self, xyz):
        "Read from extended XYZ filename or open file."

        opened = False
        if type(xyz) == type(''):
            xyz = open(xyz, 'r')
            opened = True

        line = xyz.next()
        if not line: return False

        n = int(line.strip())
        comment = (xyz.next()).strip()

        # Parse comment line
        params = ParamReader(comment)

        if not 'Properties' in params:
            raise ValueError('Properties missing from comment line')

        properties, n_int, n_real, n_str, n_logical = _parse_properties(
            params['Properties'])
        del params['Properties']

        # Get lattice
        if not 'Lattice' in params:
            raise ValueError('No lattice found in xyz file')

        lattice = numpy.reshape(params['Lattice'], (3, 3))
        del params['Lattice']

        self.alloc(n=n,lattice=lattice,properties=properties,params=params,\
                   n_int=n_int,n_real=n_real,n_str=n_str,n_logical=n_logical)

        props_dtype = self._props_dtype()

        converters = [_getconv(props_dtype.fields[name][0]) \
                      for name in props_dtype.names]

        X = []
        for i, line in enumerate(xyz):
            vals = line.split()
            row = tuple([converters[j](val) for j, val in enumerate(vals)])
            X.append(row)
            if i == self.n - 1: break  # Only read self.n lines

        try:
            data = numpy.array(X, props_dtype)
        except TypeError:
            raise IOError('End of file reached before end of frame')

        if opened: xyz.close()

        try:
            self.update_from_recarray(data)
        except ValueError:
            # got a partial frame, must be end of file
            return False
        else:
            return True

    def read_netcdf(self, fname, frame=0):
        from pupynere import netcdf_file

        nc = netcdf_file(fname)

        self.n = nc.dimensions['atom']
        self.lattice = make_lattice(nc.variables['cell_lengths'][frame],
                                    nc.variables['cell_angles'][frame])
        self.g = numpy.linalg.inv(self.lattice)
        self.params = OrderedDict()
        self.properties = OrderedDict()

        self.real = numpy.zeros((self.n, 0), dtype=float)
        self.int = numpy.zeros((self.n, 0), dtype=int)
        self.str = numpy.zeros((self.n, 0), dtype='S10')
        self.logical = numpy.zeros((self.n, 0), dtype=bool)

        vars = nc.variables.keys()
        vars = filter(lambda v: not v in ('cell_angles', 'cell_lengths'), vars)

        # ensure first var is species and second positions
        sp = vars.index('species')
        if sp != 0:
            vars[sp], vars[0] = vars[0], vars[sp]
        pos = vars.index('coordinates')
        if pos != 1:
            vars[pos], vars[1] = vars[1], vars[pos]

        for v in vars:
            d = nc.variables[v].dimensions

            if d[0] != 'frame': continue

            value = nc.variables[v][frame]
            if value.dtype == numpy.dtype('|S1'):
                value = [''.join(x).strip() for x in value]

            if len(d) == 1 or (len(d) == 2 and d[1] in ('label', 'string')):
                if (len(d) == 2 and d[1] in ('label', 'string')):
                    value = ''.join(value)
                self.params[v] = value
            else:
                # Name mangling
                if v == 'coordinates':
                    p = 'pos'
                elif v == 'velocities':
                    p = 'velo'
                else:
                    p = v
                value = nc.variables[v][frame]
                if value.dtype == numpy.dtype('|S1'):
                    value = [''.join(x).strip() for x in value]
                self.add_property(p, value)

    def write_xyz(self, xyz=sys.stdout, properties=None):
        "Write atoms in extended XYZ format. xyz can be a filename or open file"

        if properties is None:
            # Sort by original order
            props = self.properties.keys()
        else:
            props = properties

        species = getattr(self, props[0])
        if len(species.shape) != 1 or species.dtype.kind != 'S':
            raise ValueError('First property must be species like')

        pos = getattr(self, props[1])
        if pos.shape[1] != 3 or pos.dtype.kind != 'f':
            raise ValueError('Second property must be position like')

        data = self.to_recarray(props)
        format = ''.join(
            [_getfmt(data.dtype.fields[name][0])
             for name in data.dtype.names]) + '\n'

        opened = False
        if type(xyz) == type(''):
            xyz = open(xyz, 'w')
            opened = True

        xyz.write('%d\n' % self.n)
        xyz.write(self.comment(properties) + '\n')
        for i in range(self.n):
            xyz.write(format % tuple(data[i]))

        if opened: xyz.close()

    def read_cell(self, cell):
        "Read atoms from a CastepCell object or file"

        if hasattr(cell, 'next'):  # looks like a file
            cell = castep.CastepCell(cell)

        self.update(cell.to_atoms())

    def write_cell(self, fname):
        "Write Atoms to a cell file"

        cell = castep.CastepCell()
        cell.update_from_atoms(self)
        cell.write(fname)

    def read_geom(self, geom):
        "Read from a CASTEP .geom file"
        self.update(castep.read_geom(geom))

    def read_castep(self, castepfile):
        "Read from a .castep output file"

        if self.n != 0:
            self.update(
                castep.read_castep_output(castepfile, self, abort=False))
        else:
            self.update(castep.read_castep_output(castepfile, abort=False))

    def read(self, fname, filetype=None):
        "Attempt to guess type of file from extension and call appropriate read method"

        opened = False
        if type(fname) == type(''):
            if fname.endswith('.gz'):
                import gzip
                fh = gzip.open(fname)
                fname = fname[:-3]  # remove .gz
            elif fname.endswith('.nc'):
                fh = fname
            else:
                fh = open(fname, 'r')
                opened = True

            # Guess file type from extension
            if filetype is None:
                root, filetype = os.path.splitext(fname)
                filetype = filetype[1:]  # remove '.'
        else:
            fh = fname

        # Default to xyz format
        if not filetype in ['cell', 'geom', 'xyz', 'castep', 'nc']:
            filetype = 'xyz'

        if filetype == 'xyz':
            self.read_xyz(fh)
        elif filetype == 'cell':
            self.read_cell(fh)
        elif filetype == 'geom':
            self.read_geom(fh)
        elif filetype == 'castep':
            self.read_castep(fh)
        elif filetype == 'nc':
            self.read_netcdf(fh)

        if opened: fh.close()

    def write(self, fname, filetype=None):
        opened = False
        if type(fname) == type(''):
            if fname.endswith('.gz'):
                import gzip
                fh = gzip.open(fname, 'w')
                fname = fname[:-3]  # remove .gz
            else:
                fh = open(fname, 'w')

            # Guess file type from extension
            if filetype is None:
                root, filetype = os.path.splitext(fname)
                filetype = filetype[1:]  # remove '.'
            opened = True
        else:
            fh = fname

        # Default to xyz format
        if not filetype in ['xyz', 'cfg', 'cell']:
            filetype = 'xyz'

        if filetype == 'xyz':
            self.write_xyz(fh)
        elif filetype == 'cfg':
            self.write_cfg(fh)
        elif filetype == 'cell':
            self.write_cell(fh)

        if opened: fh.close()

    def write_cfg(self,
                  cfg=sys.stdout,
                  shift=numpy.array([0., 0., 0.]),
                  properties=None):
        """Write atoms in AtomEye extended CFG format. Returns a list of auxiliary properties
      actually written to CFG file, which may be abbreviated compared to those requested since
      AtomEye has a maximum of 32 aux props."""

        opened = False
        if type(cfg) == type(''):
            cfg = open(cfg, 'w')
            opened = True

        if properties is None:
            properties = self.properties.keys()

        # Header line
        cfg.write('Number of particles = %d\n' % self.n)
        cfg.write('# ' + self.comment(properties) + '\n')

        # Lattice vectors
        for i in 0, 1, 2:
            for j in 0, 1, 2:
                cfg.write('H0(%d,%d) = %16.8f\n' %
                          (i + 1, j + 1, self.lattice[i, j]))

        cfg.write('.NO_VELOCITY.\n')

        # Check first property is position-like
        species = getattr(self, properties[0])
        if len(species.shape) != 1 or species.dtype.kind != 'S':
            raise ValueError('First property must be species like')

        pos = getattr(self, properties[1])
        if pos.shape[1] != 3 or pos.dtype.kind != 'f':
            raise ValueError('Second property must be position like')

        if not self.properties.has_key('frac_pos'):
            self.add_property('frac_pos', 0.0, ncols=3)
        self.frac_pos[:] = numpy.array(
            [numpy.dot(pos[i, :], self.g) + shift for i in range(self.n)])

        if not self.properties.has_key('mass'):
            self.add_property('mass', map(ElementMass.get, self.species))

        properties = filter(
            lambda p: p not in ('pos', 'frac_pos', 'mass', 'species'),
            properties)

        # AtomEye can handle a maximum of 32 columns, so we might have to throw away
        # some of the less interesting propeeties

        def count_cols():
            n_aux = 0
            for p in properties:
                s = getattr(self, p).shape
                if len(s) == 1: n_aux += 1
                else: n_aux += s[1]
            return n_aux

        boring_properties = ['travel', 'avgpos', 'oldpos', 'acc', 'velo']
        while count_cols() > 32:
            if len(boring_properties) == 0:
                raise ValueError('No boring properties left!')
            try:
                next_most_boring = boring_properties.pop(0)
                del properties[properties.index(next_most_boring)]
            except IndexError:
                pass  # this boring property isn't in the list: move on to next

        properties = ['species', 'mass', 'frac_pos'] + properties
        data = self.to_recarray(properties)

        cfg.write('entry_count = %d\n' % (len(data.dtype.names) - 2))

        # 3 lines per atom: element name, mass and other data
        format = '%s\n%12.4f\n'
        for i, name in enumerate(data.dtype.names[2:]):
            if i > 2: cfg.write('auxiliary[%d] = %s\n' % (i - 3, name))
            format = format + _getfmt(data.dtype.fields[name][0])
        format = format + '\n'

        for i in range(self.n):
            cfg.write(format % tuple(data[i]))

        if opened: cfg.close()

        # Return column names as a list
        return list(data.dtype.names)

    def filter(self, mask):
        "Return smaller Atoms with only the elements where mask is true"

        other = Atoms()

        if mask is None:
            mask = numpy.zeros((self.n, ), numpy.bool)
            mask[:] = True

        other.n = count(mask)
        other.lattice = self.lattice.copy()
        other.g = self.g.copy()
        other.params = self.params.copy()
        other.properties = self.properties.copy()

        other.real = self.real[mask]
        other.int = self.int[mask]
        other.str = self.str[mask]
        other.logical = self.logical[mask]

        other.repoint()

        return other

    def copy(self):
        if self.n == 0:
            return Atoms()
        else:
            return self.filter(mask=None)

    def add(self, newpos, newspecies):

        if type(newpos) == type([]):
            newpos = numpy.array(newpos)

        if len(newpos.shape) == 1:
            n_new = 1
        else:
            n_new = newpos.shape[0]

        oldn = self.n
        self.n = self.n + n_new

        self.real = numpy.resize(self.real, (self.n, self.real.shape[1]))
        self.int = numpy.resize(self.int, (self.n, self.int.shape[1]))
        self.str = numpy.resize(self.str, (self.n, self.str.shape[1]))
        self.logical = numpy.resize(self.logical,
                                    (self.n, self.logical.shape[1]))

        self.repoint()

        self.pos[oldn:self.n] = newpos
        self.species[oldn:self.n] = newspecies

    def remove(self, discard):
        keep = [i for i in range(self.n) if not i in discard]

        self.n = len(keep)
        self.real = self.real[keep]
        self.int = self.int[keep]
        self.str = self.str[keep]
        self.logical = self.logical[keep]
        self.repoint()

    def supercell(self, n1, n2, n3):

        other = Atoms(n=self.n*n1*n2*n3,n_int=self.int.shape[1],\
                      n_real=self.real.shape[1], \
                      properties=self.properties.copy())

        other.lattice[0, :] = self.lattice[0, :] * n1
        other.lattice[1, :] = self.lattice[1, :] * n2
        other.lattice[2, :] = self.lattice[2, :] * n3
        other.g = numpy.linalg.inv(other.lattice)

        for i in range(n1):
            for j in range(n2):
                for k in range(n3):
                    p = numpy.dot(self.lattice, numpy.array([i, j, k]))
                    for n in range(self.n):
                        nn = ((i * n2 + j) * n3 + k) * self.n + n
                        other.int[nn, :] = self.int[n, :]
                        other.real[nn, :] = self.real[n, :]
                        other.logical[nn, :] = self.logical[n, :]
                        other.str[nn, :] = self.str[n, :]
                        other.pos[nn, :] = self.pos[n, :] + p

        other.repoint()
        return other

    def cell_volume(self):
        return abs(
            numpy.dot(numpy.cross(self.lattice[0, :], self.lattice[1, :]),
                      self.lattice[2, :]))
        build_Tree()
        index += 1
    
    
    if len(Tindices2keep_set) == len(i_iminus1_pool_dict.keys()):
        print "NO TOCSY INDEX GROUPS WERE REMOVED FROM CONNECTIVITY FILE !!!"
        sys.exit(0)
    for Tindex in i_iminus1_pool_dict.keys():
        if Tindex not in Tindices2keep_set:
            print "Deleting TOCSY index group", Tindex, " from connectivities pool file."
            del i_iminus1_pool_dict[Tindex]
            del i_iminus1_complete_dict[Tindex]
            del iaaindex_iminus1aaTypesProbTupleList_dict[Tindex]
            del iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex]
    
    for Tindex in iaaindex_iminus1aaTypesProbPoolTupleList_dict.keys():
        if args.KEEP_ONLY_GLY:
            duplet_list = iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex]
            if len(duplet_list) > 1 and duplet_list[0][0] == 'GLY' and duplet_list[0][1] >= 100 * duplet_list[1][1]:
                print "Setting amino acid type of TOCSY index group ", Tindex, " to ", [duplet_list[0]]
                iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] = [duplet_list[0]]
        if args.KEEP_ONLY_ALA:
            duplet_list = iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex]
            if len(duplet_list) > 1 and duplet_list[0][0] == 'ALA' and duplet_list[0][1] >= 100 * duplet_list[1][1]:
                print "Setting amino acid type of TOCSY index group ", Tindex, " to ", [duplet_list[0]]
                iaaindex_iminus1aaTypesProbPoolTupleList_dict[Tindex] = [duplet_list[0]]
    
elif args.MAXIMUM_OCCUPANCY_TOLERANCE != None:  # KEEP ALL CONNECTIVITIES (not only those forming long chains)

    Tindex_maxOccupancy_dict = {}
    for triplet_list in i_iminus1_pool_dict.values():
Exemple #38
0
    for qline_list in sorted_query_lineLists_list:
        current_resid = qline_list[0].replace('?-?-', '').replace('N-H', '')
        if current_resid != previous_resid:
            Tindex_CSlist_dict[current_resid] = []
        Tindex_CSlist_dict[current_resid].append((qline_list[1:]))
        previous_resid = current_resid

    return Tindex_CSlist_dict


Tindex_CSlist_dict = read_spectrum_file(args.TOCSY_fname)
residue_CSlist_dict = OrderedDict(
)  # ordereddict with keys the resname+resid -> the list of the associated H-C-N-HN resonances. E.g.
patched_residues_list = [
]  # list with the residues that were added by -patch option in the chain_linker.py (they have no TOCSY peaks but have NOESY)
for residue in residue2Tindex_dict.keys():
    print "DEBUG: residue=", residue
    if residue2Tindex_dict[residue] == None:
        print "DEBUG: residue2Tindex_dict[residue]=", residue2Tindex_dict[
            residue]
        continue
    try:
        residue_CSlist_dict[residue] = Tindex_CSlist_dict[
            residue2Tindex_dict[residue]]
    except KeyError:  # in case this residue from the alignment has no TOCSY peaks (was added using -patch in the chain_linker.py), save it
        patched_residues_list.append(residue)
        continue

print "DEBUG: residue_CSlist_dict =", residue_CSlist_dict
print "DEBUG: patched_residues_list=", patched_residues_list
NOESY_residue_CSlist_dict = OrderedDict(
Exemple #39
0
class HasParameters(object):
    """This class provides an implementation of the IHasParameters interface."""

    _do_not_promote = [
        'get_expr_depends', 'get_referenced_compnames',
        'get_referenced_varpaths', 'get_metadata'
    ]

    def __init__(self, parent):
        self._parameters = OrderedDict()
        self._allowed_types = ['continuous']
        if obj_has_interface(parent, ISolver):
            self._allowed_types.append('unbounded')
        self._parent = None if parent is None else weakref.ref(parent)

    def __getstate__(self):
        state = self.__dict__.copy()
        state['_parent'] = self.parent
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        parent = state['_parent']
        self._parent = None if parent is None else weakref.ref(parent)

    @property
    def parent(self):
        """ The object we are a delegate of. """
        return None if self._parent is None else self._parent()

    def _item_count(self):
        """This is used by the replace function to determine if a delegate from
        the target object is 'empty' or not.  If it's empty, it's not an error
        if the replacing object doesn't have this delegate.
        """
        return len(self._parameters)

    def add_parameter(self,
                      target,
                      low=None,
                      high=None,
                      scaler=None,
                      adder=None,
                      start=None,
                      fd_step=None,
                      name=None,
                      scope=None):
        """Adds a parameter or group of parameters to the driver.

        target: string or iter of strings or Parameter
            What the driver should vary during execution. A *target* is an
            expression that can reside on the left-hand side of an assignment
            statement, so typically it will be the name of a variable or
            possibly a subscript expression indicating an entry within an array
            variable, e.g., x[3]. If an iterator of targets is given, then the
            driver will set all targets given to the same value whenever it
            varies this parameter during execution. If a Parameter instance is
            given, then that instance is copied into the driver with any other
            arguments specified, overiding the values in the given parameter.

        low: float (optional)
            Minimum allowed value of the parameter. If scaler and/or adder
            is supplied, use the transformed value here. If target is an array,
            this may also be an array, but must have the same size.

        high: float (optional)
            Maximum allowed value of the parameter. If scaler and/or adder
            is supplied, use the transformed value here. If target is an array,
            this may also be an array, but must have the same size.

        scaler: float (optional)
            Value to multiply the possibly offset parameter value by. If target
            is an array, this may also be an array, but must have the same size.

        adder: float (optional)
            Value to add to parameter prior to possible scaling. If target is
            an array, this may also be an array, but must have the same size.

        start: any (optional)
            Value to set into the target or targets of a parameter before
            starting any executions. If not given, analysis will start with
            whatever values are in the target or targets at that time. If target
            is an array, this may also be an array, but must have the same size.

        fd_step: float (optional)
            Step-size to use for finite difference calculation. If no value is
            given, the differentiator will use its own default. If target is an
            array, this may also be an array, but must have the same size.

        name: str (optional)
            Name used to refer to the parameter in place of the name of the
            variable referred to in the parameter string.
            This is sometimes useful if, for example, multiple entries in the
            same array variable are declared as parameters.

        scope: object (optional)
            The object to be used as the scope when evaluating the expression.

        If neither "low" nor "high" is specified, the min and max will
        default to the values in the metadata of the variable being
        referenced.
        """

        if isinstance(target, (ParameterBase, ParameterGroup)):
            self._parameters[target.name] = target
            target.override(low, high, scaler, adder, start, fd_step, name)
        else:
            if isinstance(target, basestring):
                names = [target]
                key = target
            else:
                names = target
                key = tuple(target)

            if name is not None:
                key = name

            dups = set(self.list_param_targets()).intersection(names)
            if len(dups) == 1:
                self.parent.raise_exception(
                    "'%s' is already a Parameter"
                    " target" % dups.pop(), ValueError)
            elif len(dups) > 1:
                self.parent.raise_exception(
                    "%s are already Parameter targets" % sorted(list(dups)),
                    ValueError)

            if key in self._parameters:
                self.parent.raise_exception("%s is already a Parameter" % key,
                                            ValueError)
            try:
                _scope = self._get_scope(scope)
                if len(names) == 1:
                    target = self._create(names[0], low, high, scaler, adder,
                                          start, fd_step, key, _scope)
                else:  # defining a ParameterGroup
                    parameters = [
                        self._create(n, low, high, scaler, adder, start,
                                     fd_step, key, _scope) for n in names
                    ]
                    types = set([p.valtypename for p in parameters])
                    if len(types) > 1:
                        raise ValueError("Can't add parameter %s because "
                                         "%s are not all of the same type" %
                                         (key, " and ".join(names)))
                    target = ParameterGroup(parameters)
                self._parameters[key] = target
            except Exception:
                self.parent.reraise_exception(info=sys.exc_info())

        self.parent.config_changed()

    def _create(self, target, low, high, scaler, adder, start, fd_step, key,
                scope):
        """ Create one Parameter or ArrayParameter. """
        try:
            expreval = ExprEvaluator(target, scope)
        except Exception as err:
            raise err.__class__("Can't add parameter: %s" % err)
        if not expreval.is_valid_assignee():
            raise ValueError("Can't add parameter: '%s' is not a"
                             " valid parameter expression" % expreval.text)
        try:
            val = expreval.evaluate()
        except Exception as err:
            val = None  # Let Parameter code sort out why.

        name = key[0] if isinstance(key, tuple) else key

        if isinstance(val, ndarray):
            return ArrayParameter(target,
                                  low=low,
                                  high=high,
                                  scaler=scaler,
                                  adder=adder,
                                  start=start,
                                  fd_step=fd_step,
                                  name=name,
                                  scope=scope,
                                  _expreval=expreval,
                                  _val=val,
                                  _allowed_types=self._allowed_types)
        else:
            return Parameter(target,
                             low=low,
                             high=high,
                             scaler=scaler,
                             adder=adder,
                             start=start,
                             fd_step=fd_step,
                             name=name,
                             scope=scope,
                             _expreval=expreval,
                             _val=val,
                             _allowed_types=self._allowed_types)

    def remove_parameter(self, name):
        """Removes the parameter with the given name."""
        param = self._parameters.get(name)
        if param:
            del self._parameters[name]
        else:
            self.parent.raise_exception(
                "Trying to remove parameter '%s' "
                "that is not in this driver." % (name, ), AttributeError)
        self.parent.config_changed()

    def config_parameters(self):
        """Reconfigure parameters from potentially changed targets."""
        for param in self._parameters.values():
            param.configure()

    def get_references(self, name):
        """Return references to component `name` in preparation for subsequent
        :meth:`restore_references` call.

        name: string
            Name of component being removed.
        """
        refs = OrderedDict()
        for pname, param in self._parameters.items():
            if name in param.get_referenced_compnames():
                refs[pname] = param
        return refs

    def remove_references(self, name):
        """Remove references to component `name`.

        name: string
            Name of component being removed.
        """
        to_remove = []
        for pname, param in self._parameters.items():
            if name in param.get_referenced_compnames():
                to_remove.append(pname)

        for pname in to_remove:
            self.remove_parameter(pname)

    def restore_references(self, refs):
        """Restore references to component `name` from `refs`.

        refs: object
            Value returned by :meth:`get_references`.
        """
        for pname, param in refs.items():
            try:
                self.add_parameter(param)
            except Exception as err:
                self.parent._logger.warning(
                    "Couldn't restore parameter '%s': %s" % (pname, str(err)))

    def list_param_targets(self):
        """Returns a list of parameter targets. Note that this
        list may contain more entries than the list of Parameter,
        ParameterGroup, and ArrayParameter objects since ParameterGroup
        instances have multiple targets.
        """
        targets = []
        for param in self._parameters.values():
            targets.extend(param.targets)
        return targets

    def list_param_group_targets(self):
        """Returns a list of tuples that contain the targets for each
        parameter group.
        """
        targets = []
        for param in self.get_parameters().values():
            targets.append(tuple(param.targets))
        return targets

    def clear_parameters(self):
        """Removes all parameters."""
        for name in self._parameters.keys():
            self.remove_parameter(name)
        self._parameters = OrderedDict()

    def get_parameters(self):
        """Returns an ordered dict of parameter objects."""
        return self._parameters

    def total_parameters(self):
        """Returns the total number of values to be set."""
        return sum([param.size for param in self._parameters.values()])

    def init_parameters(self):
        """Sets all parameters to their start value if a
        start value is given
        """
        scope = self._get_scope()
        for param in self._parameters.itervalues():
            if param.start is not None:
                param.set(param.start, scope)

    def set_parameter_by_name(self, name, value, case=None, scope=None):
def main():
    try:
        sys.stderr.write(g_program_name + ", version " + __version__ + ", " +
                         __date__ + "\n")
        if sys.version < '2.6':
            raise Exception(
                'Error: Using python ' + sys.version + '\n' +
                '       Alas, your version of python is too old.\n'
                '       You must upgrade to a newer version of python (2.6 or later).'
            )

        if sys.version < '2.7':
            from ordereddict import OrderedDict
        else:
            from collections import OrderedDict

        if sys.version > '3':
            import io
        else:
            import cStringIO

        # defaults:
        ffname = "TINKER_FORCE_FIELD"
        type_subset = Set([])
        filename_in = ""
        file_in = sys.stdin
        pair_style_name = "lj/cut/coul/long"
        pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html"
        bond_style_name = "harmonic"
        bond_style_link = "http://lammps.sandia.gov/doc/bond_harmonic.html"
        angle_style_name = "harmonic"
        angle_style_link = "http://lammps.sandia.gov/doc/angle_harmonic.html"
        dihedral_style_name = "fourier"
        dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_fourier.html"
        improper_style_name = "harmonic"
        improper_style_link = "http://lammps.sandia.gov/doc/improper_harmonic.html"
        #improper_style_name = "cvff"
        #improper_style_link = "http://lammps.sandia.gov/doc/improper_cvff.html"
        special_bonds_command = "special_bonds lj/coul 0.0 0.0 0.5"
        mixing_style = "geometric"
        use_hybrid = False
        contains_united_atoms = False
        zeropad_ffid = 1

        argv = [arg for arg in sys.argv]

        i = 1

        while i < len(argv):

            #sys.stderr.write('argv['+str(i)+'] = \"'+argv[i]+'\"\n')

            if argv[i] == '-atoms':
                if i + 1 >= len(argv):
                    raise Exception(
                        'Error: the \"' + argv[i] +
                        '\" argument should be followed by a quoted string\n'
                        '       which contains a space-delimited list of of a subset of atom types\n'
                        '       you want to use from the original force-field.\n'
                        '       Make sure you enclose the entire list in quotes.\n'
                    )
                type_subset = Set(argv[i + 1].strip('\"\'').strip().split())
                del argv[i:i + 2]

            elif argv[i] == '-name':
                if i + 1 >= len(argv):
                    raise Exception(
                        'Error: ' + argv[i] +
                        ' flag should be followed by the name of the force-field\n'
                    )
                ffname = argv[i + 1]
                del argv[i:i + 2]

            elif argv[i] in ('-file', '-in-file'):
                if i + 1 >= len(argv):
                    raise Exception(
                        'Error: ' + argv[i] +
                        ' flag should be followed by the name of a force-field file\n'
                    )
                filename_in = argv[i + 1]
                try:
                    file_in = open(filename_in, 'r')
                except IOError:
                    sys.stderr.write('Error: Unable to open file\n'
                                     '       \"' + filename_in + '\"\n'
                                     '       for reading.\n')
                    sys.exit(1)
                del argv[i:i + 2]

            elif argv[i] == '-dihedral-style':
                if i + 1 >= len(argv):
                    raise Exception(
                        'Error: ' + argv[i] +
                        ' flag should be followed by either \"opls\" or \"fourier\"\n'
                    )
                dihedral_style_name = argv[i + 1]
                if dihedral_style_name == "fourier":
                    dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_fourier.html"
                if dihedral_style_name == "opls":
                    dihedral_style_link = "http://lammps.sandia.gov/doc/dihedral_opls.html"
                else:
                    raise Exception('Error: ' + argv[i] + ' ' +
                                    dihedral_style_name + ' not supported.\n')
                del argv[i:i + 2]

            elif argv[i] in ('-url', '-in-url'):
                import urllib2
                if i + 1 >= len(argv):
                    raise InputError(
                        'Error: ' + argv[i] +
                        ' flag should be followed by a URL pointing to\n'
                        ' a TINKER file containing force-field information.\n')
                url = argv[i + 1]
                try:
                    request = urllib2.Request(url)
                    file_in = urllib2.urlopen(request)
                except urllib2.URLError:
                    sys.stdout.write("Error: Unable to open link:\n" + url +
                                     "\n")
                    sys.exit(1)
                del argv[i:i + 2]

            elif argv[i] == '-hybrid':
                use_hybrid = True
                del argv[i:i + 1]

            elif (argv[i] == '-zeropad' or argv[i] == '-zero-pad'):
                if (i + 1 >= len(argv)) or (argv[i + 1][1:] == '-'):
                    raise Exception(
                        'Error: ' + argv[i] +
                        ' flag should be followed by a positive integer\n')
                zeropad_ffid = int(argv[i + 1])
                del argv[i:i + 2]

            elif argv[i] in ('-help', '--help', '-?', '--?'):
                sys.stderr.write(doc_msg)
                sys.exit(0)
                del argv[i:i + 1]

            else:
                i += 1

        if len(argv) != 1:
            raise Exception('Error: Unrecongized arguments: ' +
                            ' '.join(argv[1:]) + '\n\n' + doc_msg)

        #sys.stderr.write("Reading parameter file...\n")

        lines = file_in.readlines()

        atom2charge = OrderedDict()  # lookup charge from atom type
        atom2mass = OrderedDict()  # lookup mass from atom type
        atom2vdw_e = OrderedDict()  # lookup Lennard-Jones "epsilon" parameter
        atom2vdw_s = OrderedDict()  # lookup Lennard-Jones "sigma" parameter
        atom2descr = OrderedDict()
        atom2ffid = OrderedDict()  # lookup force-field-ID from atom type
        # force-field-ID is an id number/string used to assign
        # bonds, angles, dihedrals, and impropers.

        bonds_by_type = OrderedDict(
        )  # lookup bond parameters by force-field-ID
        angles_by_type = OrderedDict(
        )  # lookup angle parameters by force-field-ID
        dihedrals_by_type = OrderedDict(
        )  # lookup dihedral parameters by force-field-ID
        impropers_by_type = OrderedDict(
        )  # lookup improper parameters by force-field-ID
        lines_ureybrad = []
        lines_biotype = []

        for iline in range(0, len(lines)):
            line = lines[iline]
            tokens = SplitQuotedString(line.strip(), comment_char='#')

            if (len(tokens) > 1) and (tokens[0] == 'atom'):
                tokens = map(RemoveOuterQuotes,
                             SplitQuotedString(line.strip(), comment_char=''))
                if (len(tokens) > 6):
                    if ((len(type_subset) == 0) or (tokens[1] in type_subset)):
                        atom2ffid[tokens[1]] = tokens[2]
                        #atom2mass[tokens[1]] = float(tokens[6])
                        # Some atoms in oplsaa.prm have zero mass. Unfortunately this
                        # causes LAMMPS to crash, even if these atoms are never used,
                        # so I give the mass a non-zero value instead.
                        atom2mass[tokens[1]] = max(float(tokens[6]), 1e-30)
                        atom2descr[tokens[1]] = tokens[4]
                        if tokens[4].find('(UA)') != -1:
                            contains_united_atoms = True
                else:
                    raise Exception('Error: Invalid atom line:\n' + line)
            elif (len(tokens) > 2) and (tokens[0] == 'charge'):
                if ((len(type_subset) == 0) or (tokens[1] in type_subset)):
                    atom2charge[tokens[1]] = float(tokens[2])
            elif (len(tokens) > 3) and (tokens[0] == 'vdw'):
                if ((len(type_subset) == 0) or (tokens[1] in type_subset)):
                    atom2vdw_e[tokens[1]] = float(tokens[3])  # "epsilon"
                    atom2vdw_s[tokens[1]] = float(tokens[2])  # "sigma"
            elif (len(tokens) > 4) and (tokens[0] == 'bond'):
                k = float(tokens[3])
                r0 = float(tokens[4])
                bonds_by_type[tokens[1].rjust(zeropad_ffid, '0'),
                              tokens[2].rjust(zeropad_ffid, '0')] = (k, r0)
            elif (len(tokens) > 5) and (tokens[0] == 'angle'):
                k = float(tokens[4])
                angle0 = float(tokens[5])
                angles_by_type[tokens[1].rjust(zeropad_ffid, '0'),
                               tokens[2].rjust(zeropad_ffid, '0'),
                               tokens[3].rjust(zeropad_ffid, '0')] = (k,
                                                                      angle0)
            elif (len(tokens) > 11) and (tokens[0] == 'torsion'):
                if dihedral_style_name == 'fourier':
                    # http://lammps.sandia.gov/doc/dihedral_fourier.html
                    m = (len(tokens) - 5) / 3
                    K = [0.0 for i in range(0, m)]
                    n = [0.0 for i in range(0, m)]
                    d = [0.0 for i in range(0, m)]
                    for i in range(0, m):
                        K[i] = float(tokens[5 + 3 * i])
                        d[i] = float(tokens[5 + 3 * i + 1])
                        n[i] = float(tokens[5 + 3 * i + 2])
                    dihedrals_by_type[tokens[1].rjust(zeropad_ffid, '0'),
                                      tokens[2].rjust(zeropad_ffid, '0'),
                                      tokens[3].rjust(zeropad_ffid, '0'),
                                      tokens[4].rjust(zeropad_ffid, '0')] = (K,
                                                                             n,
                                                                             d)
                elif dihedral_style_name == 'opls':
                    # http://lammps.sandia.gov/doc/dihedral_opls.html
                    K1 = float(tokens[5])
                    K2 = float(tokens[8])
                    K3 = float(tokens[11])
                    K4 = 0.0
                    if len(tokens) > 14:
                        K4 = float(tokens[14])
                    if ((float(tokens[6]) != 0.0) or (float(tokens[7]) != 1.0)
                            or (float(tokens[9]) not in (180.0, -180.0))
                            or (float(tokens[10]) != 2.0)
                            or (float(tokens[12]) != 0.0)
                            or (float(tokens[13]) != 3.0)
                            or ((K4 != 0.0) and
                                ((len(tokens) <= 16) or
                                 (float(tokens[15]) not in (180.0, -180.0)) or
                                 (float(tokens[16]) != 4.0)))):
                        raise Exception(
                            "Error: This parameter file is incompatible with -dihedral-style \""
                            + dihedral_style_name + "\"\n" +
                            "       (See line number " + str(iline + 1) +
                            " of parameter file.)\n")
                    dihedrals_by_type[tokens[1].rjust(zeropad_ffid, '0'),
                                      tokens[2].rjust(zeropad_ffid, '0'),
                                      tokens[3].rjust(zeropad_ffid, '0'),
                                      tokens[4].rjust(zeropad_ffid, '0')] = (
                                          K1, K2, K3, K4)
                else:
                    assert (False)

            elif (len(tokens) > 7) and (tokens[0] == 'imptors'):
                k = float(tokens[5])
                angle0 = float(tokens[6])
                multiplicity = float(tokens[7])
                impropers_by_type[tokens[1].rjust(zeropad_ffid, '0'),
                                  tokens[2].rjust(zeropad_ffid, '0'),
                                  tokens[3].rjust(zeropad_ffid, '0'),
                                  tokens[4].rjust(zeropad_ffid, '0')] = (
                                      k / multiplicity, angle0)
            elif ((len(tokens) > 0) and (tokens[0] == 'biotype')):
                # I'm not sure what to do with these, so I'll store them for now and
                # append them as comments to the .lt file generated by the program.
                lines_biotype.append(line.rstrip())
            elif ((len(tokens) > 0) and (tokens[0] == 'ureybrad')):
                # I'm not sure what to do with these, so I'll store them for now and
                # append them as comments to the .lt file generated by the program.
                lines_ureybrad.append(line.rstrip())
            elif ((len(tokens) > 1) and (tokens[0] == 'radiusrule')):
                if tokens[1] == 'GEOMETRIC':
                    mixing_style = 'geometric'
                elif tokens[1] == 'ARITHMETIC':
                    mixing_style = 'arithmetic'
                else:
                    raise Exception("Error: Unrecognized mixing style: " +
                                    tokens[1] + ", found here:\n" + line)
            elif ((len(tokens) > 1) and (tokens[0] == 'epsilonrule')):
                if tokens[1] != 'GEOMETRIC':
                    raise Exception(
                        "Error: As of 2016-9-21, LAMMPS only supports GEOMETRIC mixing of energies\n"
                        +
                        "       This force field simply cannot be used with LAMMPS in a general way.\n"
                        +
                        "       One way around this is to manually change the \"epsilonrule\" back to\n"
                        +
                        "       GEOMETRIC, and limit the number of atom types considered by this\n"
                        +
                        "       program by using the -atoms \"LIST OF ATOMS\" argument,\n"
                        +
                        "       to only include the atoms you care about, and then explicitly\n"
                        +
                        "       define pair_coeffs for all possible pairs of these atom types.\n"
                        +
                        "       If this is a popular force-field, then lobby the LAMMPS developers\n"
                        + "       to consider alternate mixing rules.\n\n" +
                        "The offending line from the file is line number " +
                        str(iline) + ":\n" + line + "\n")

        # Zero-pad the atom2ffid values so that they have the same number
        # of digits.  This is usually not necessary, but it can be helpful
        # to remove uncertainty about the meaning of '4*' which could
        # pattern match with '4', '4L', '47', '47L'...  If you replace '4'
        # with '04', '04*' becomes distinguishable from '47*'.
        # This can be useful if you want to augment the force field later,
        # (for example, adding additional atoms to the LOPLSAA variant of OPLSAA)

        for k in atom2ffid.keys():
            atom2ffid[k] = atom2ffid[k].rjust(zeropad_ffid, '0')

            # Horrible hack:  for LOPLSAA, uncomment the next 3 lines:
            #ki = atom2ffid[k].find('L')
            #if ki!=-1:
            #    atom2ffid[k] = atom2ffid[k].rjust(zeropad_ffid + len(atom2ffid[k]) - ki, '0')

        #sys.stderr.write(" done.\n")
        #sys.stderr.write("Converting to moltemplate format...\n")

        system_is_charged = False
        for atom_type in atom2charge:
            if atom2charge[atom_type] != 0.0:
                system_is_charged = True

        if system_is_charged:
            pair_style_name = "lj/cut/coul/long"
            pair_style_params = "10.0 10.0"
            kspace_style = "    kspace_style pppm 0.0001\n"
            pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html"
        else:
            pair_style_name = "lj/cut"
            pair_style_params = "10.0"
            kspace_style = ""
            pair_style_link = "http://lammps.sandia.gov/doc/pair_lj.html"

        pair_style_command = "    pair_style " + ("hybrid " if use_hybrid else "") + \
                             pair_style_name + " " + pair_style_params + "\n"

        sys.stdout.write("# This file was generated automatically using:\n")
        sys.stdout.write("# " + g_program_name + " " + " ".join(sys.argv[1:]) +
                         "\n")
        if contains_united_atoms:
            sys.stdout.write(
                "#\n"
                "# WARNING: Many of these atoms are probably UNITED-ATOM (UA) atoms.\n"
                "#          The hydrogen atoms have been absorbed into the heavy atoms, and the\n"
                "#          force-field modified accordingly. Do not mix with ordinary atoms.\n"
            )
        sys.stdout.write(
            "#\n"
            "# WARNING: The following 1-2, 1-3, and 1-4 weighting parameters were ASSUMED:\n"
        )
        sys.stdout.write("#          " + special_bonds_command + "\n")
        sys.stdout.write(
            "#          (See http://lammps.sandia.gov/doc/special_bonds.html for details)\n"
        )
        if len(lines_ureybrad) > 0:
            sys.stdout.write(
                "#\n"
                "# WARNING: All Urey-Bradley interactions have been IGNORED including:\n"
            )
            sys.stdout.write(
                "#             ffid1 ffid2 ffid3    K        r0\n# ")
            sys.stdout.write("\n# ".join(lines_ureybrad))
            sys.stdout.write("\n\n")
        sys.stdout.write("\n\n")
        sys.stdout.write(ffname + " {\n\n")

        sys.stdout.write(
            "  # Below we will use lammps \"set\" command to assign atom charges\n"
            "  # by atom type.  http://lammps.sandia.gov/doc/set.html\n\n")

        sys.stdout.write("  write_once(\"In Charges\") {\n")
        for atype in atom2mass:
            assert (atype in atom2descr)
            sys.stdout.write("    set type @atom:" + atype + " charge " +
                             str(atom2charge[atype]) + "  # \"" +
                             atom2descr[atype] + "\"\n")
        sys.stdout.write("  } #(end of atom partial charges)\n\n\n")

        sys.stdout.write("  write_once(\"Data Masses\") {\n")
        for atype in atom2mass:
            sys.stdout.write("    @atom:" + atype + " " +
                             str(atom2mass[atype]) + "\n")
        sys.stdout.write("  } #(end of atom masses)\n\n\n")

        sys.stdout.write(
            "  # ---------- EQUIVALENCE CATEGORIES for bonded interaction lookup ----------\n"
            "  #   Each type of atom has a separate ID used for looking up bond parameters\n"
            "  #   and a separate ID for looking up 3-body angle interaction parameters\n"
            "  #   and a separate ID for looking up 4-body dihedral interaction parameters\n"
            "  #   and a separate ID for looking up 4-body improper interaction parameters\n"
            #"  #   (This is because there are several different types of sp3 carbon atoms\n"
            #"  #   which have the same torsional properties when within an alkane molecule,\n"
            #"  #   for example.  If they share the same dihedral-ID, then this frees us\n"
            #"  #   from being forced define separate dihedral interaction parameters\n"
            #"  #   for all of them.)\n"
            "  #   The complete @atom type name includes ALL of these ID numbers.  There's\n"
            "  #   no need to force the end-user to type the complete name of each atom.\n"
            "  #   The \"replace\" command used below informs moltemplate that the short\n"
            "  #   @atom names we have been using abovee are equivalent to the complete\n"
            "  #   @atom names used below:\n\n")

        for atype in atom2ffid:
            ffid = atype + "_ffid" + atom2ffid[atype]
            sys.stdout.write("  replace{ @atom:" + atype + " @atom:" + atype +
                             "_b" + atom2ffid[atype] + "_a" +
                             atom2ffid[atype] + "_d" + atom2ffid[atype] +
                             "_i" + atom2ffid[atype] + " }\n")
        sys.stdout.write("\n\n\n\n")

        sys.stdout.write(
            "  # --------------- Non-Bonded interactions: ---------------------\n"
            "  # " + pair_style_link + "\n"
            "  # Syntax:\n"
            "  # pair_coeff    AtomType1    AtomType2   " +
            ("PairStyleName  " if use_hybrid else "") + "parameters...\n\n")

        sys.stdout.write("  write_once(\"In Settings\") {\n")
        for atype in atom2vdw_e:
            assert (atype in atom2vdw_s)
            if not (atype in atom2ffid):
                continue

            sys.stdout.write("    pair_coeff " + "@atom:" + atype + "_b" +
                             atom2ffid[atype] + "_a" + atom2ffid[atype] +
                             "_d" + atom2ffid[atype] + "_i" +
                             atom2ffid[atype] + " "
                             "@atom:" + atype + "_b" + atom2ffid[atype] +
                             "_a" + atom2ffid[atype] + "_d" +
                             atom2ffid[atype] + "_i" + atom2ffid[atype] + " " +
                             (pair_style_name if use_hybrid else "") + " " +
                             str(atom2vdw_e[atype]) + " " +
                             str(atom2vdw_s[atype]) + "\n")
        sys.stdout.write("  } #(end of pair_coeffs)\n\n\n\n")

        sys.stdout.write("  # ------- Bonded Interactions: -------\n"
                         "  # " + bond_style_link + "\n"
                         "  # Syntax:  \n"
                         "  # bond_coeff BondTypeName  " +
                         ("BondStyleName  " if use_hybrid else "") +
                         "parameters...\n\n")

        sys.stdout.write("  write_once(\"In Settings\") {\n")
        for btype in bonds_by_type:
            ffid1 = btype[0] if btype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = btype[1] if btype[1] != ("0" * zeropad_ffid) else "X"
            (k, r0) = bonds_by_type[btype]
            sys.stdout.write("    bond_coeff @bond:" + ffid1 + "_" + ffid2 +
                             " " + (bond_style_name if use_hybrid else "") +
                             " " + str(k) + " " + str(r0) + "\n")
        sys.stdout.write("  } #(end of bond_coeffs)\n\n")

        sys.stdout.write("  # Rules for assigning bond types by atom type:\n"
                         "  # BondTypeName       AtomType1         AtomType2\n"
                         "  #   (* = wildcard)\n\n")

        sys.stdout.write("  write_once(\"Data Bonds By Type\") {\n")
        for btype in bonds_by_type:
            ffid1 = btype[0] if btype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = btype[1] if btype[1] != ("0" * zeropad_ffid) else "X"
            sys.stdout.write("    @bond:" + ffid1 + "_" + ffid2)
            ffid1 = "@atom:*_b" + btype[0] + \
                "*_a*_d*_i*" if btype[0] != ("0"*zeropad_ffid) else "@atom:*"
            ffid2 = "@atom:*_b" + btype[1] + \
                "*_a*_d*_i*" if btype[1] != ("0"*zeropad_ffid) else "@atom:*"
            sys.stdout.write(" " + ffid1 + " " + ffid2 + "\n")
        sys.stdout.write("  } #(end of bonds by type)\n\n\n\n\n")

        sys.stdout.write("  # ------- Angle Interactions: -------\n"
                         "  # " + angle_style_link + "\n"
                         "  # Syntax:  \n"
                         "  # angle_coeff AngleTypeName  " +
                         ("AngleStyleName  " if use_hybrid else "") +
                         "parameters...\n\n")

        sys.stdout.write("  write_once(\"In Settings\") {\n")
        for atype in angles_by_type:
            ffid1 = atype[0] if atype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = atype[1] if atype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = atype[2] if atype[2] != ("0" * zeropad_ffid) else "X"
            (k, angle0) = angles_by_type[atype]
            sys.stdout.write("    angle_coeff @angle:" + ffid1 + "_" + ffid2 +
                             "_" + ffid3 + " " +
                             (angle_style_name if use_hybrid else "") + " " +
                             str(k) + " " + str(angle0) + "\n")
        sys.stdout.write("  } #(end of angle_coeffs)\n\n")

        sys.stdout.write(
            "  # Rules for creating angle interactions according to atom type:\n"
            "  #   AngleTypeName     AtomType1       AtomType2          AtomType3\n"
            "  #   (* = wildcard)\n\n")

        sys.stdout.write("  write_once(\"Data Angles By Type\") {\n")
        for atype in angles_by_type:
            ffid1 = atype[0] if atype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = atype[1] if atype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = atype[2] if atype[2] != ("0" * zeropad_ffid) else "X"
            sys.stdout.write("    @angle:" + ffid1 + "_" + ffid2 + "_" + ffid3)
            ffid1 = "@atom:*_b*_a" + atype[0] + \
                "*_d*_i*" if atype[0] != ("0"*zeropad_ffid) else "@atom:*"
            ffid2 = "@atom:*_b*_a" + atype[1] + \
                "*_d*_i*" if atype[1] != ("0"*zeropad_ffid) else "@atom:*"
            ffid3 = "@atom:*_b*_a" + atype[2] + \
                "*_d*_i*" if atype[2] != ("0"*zeropad_ffid) else "@atom:*"
            sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + "\n")
        sys.stdout.write("  } #(end of angles by type)\n\n\n\n\n")

        sys.stdout.write(
            "  # ----------- Dihedral Interactions: ------------\n"
            "  # " + dihedral_style_link + "\n"
            "  # Syntax:\n"
            "  # dihedral_coeff DihedralTypeName  " +
            ("DihedralStyleName  " if use_hybrid else "") +
            "parameters...\n\n")

        sys.stdout.write("  write_once(\"In Settings\") {\n")
        for dtype in dihedrals_by_type:
            ffid1 = dtype[0] if dtype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = dtype[1] if dtype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = dtype[2] if dtype[2] != ("0" * zeropad_ffid) else "X"
            ffid4 = dtype[3] if dtype[3] != ("0" * zeropad_ffid) else "X"
            sys.stdout.write("    dihedral_coeff @dihedral:" + ffid1 + "_" +
                             ffid2 + "_" + ffid3 + "_" + ffid4 + " " +
                             (dihedral_style_name if use_hybrid else "") + " ")
            if dihedral_style_name == 'fourier':
                # http://lammps.sandia.gov/doc/dihedral_fourier.html
                (K, n, d) = dihedrals_by_type[dtype]
                m = len(K)
                assert ((m == len(n)) and (m == len(d)))
                sys.stdout.write(str(m))
                for i in range(0, m):
                    sys.stdout.write(" " + str(K[i]) + " " + str(n[i]) + " " +
                                     str(d[i]))
                sys.stdout.write("\n")
            elif dihedral_style_name == 'opls':
                # http://lammps.sandia.gov/doc/dihedral_opls.html
                (K1, K2, K3, K4) = dihedrals_by_type[dtype]
                sys.stdout.write(
                    str(K1) + " " + str(K2) + " " + str(K3) + " " + str(K4) +
                    "\n")
            else:
                assert (False)
        sys.stdout.write("  } #(end of dihedral_coeffs)\n\n")

        sys.stdout.write(
            "  # Rules for creating dihedral interactions according to atom type:\n"
            "  #   DihedralTypeName     AtomType1     AtomType2     AtomType3     AtomType4\n"
            "  #   (* = wildcard)\n\n")

        sys.stdout.write("  write_once(\"Data Dihedrals By Type\") {\n")
        for dtype in dihedrals_by_type:
            ffid1 = dtype[0] if dtype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = dtype[1] if dtype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = dtype[2] if dtype[2] != ("0" * zeropad_ffid) else "X"
            ffid4 = dtype[3] if dtype[3] != ("0" * zeropad_ffid) else "X"
            sys.stdout.write("    @dihedral:" + ffid1 + "_" + ffid2 + "_" +
                             ffid3 + "_" + ffid4)
            ffid1 = "@atom:*_b*_a*_d" + dtype[0] + \
                "*_i*" if dtype[0] != ("0"*zeropad_ffid) else "@atom:*"
            ffid2 = "@atom:*_b*_a*_d" + dtype[1] + \
                "*_i*" if dtype[1] != ("0"*zeropad_ffid) else "@atom:*"
            ffid3 = "@atom:*_b*_a*_d" + dtype[2] + \
                "*_i*" if dtype[2] != ("0"*zeropad_ffid) else "@atom:*"
            ffid4 = "@atom:*_b*_a*_d" + dtype[3] + \
                "*_i*" if dtype[3] != ("0"*zeropad_ffid) else "@atom:*"

            sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + " " +
                             ffid4 + "\n")
        sys.stdout.write("  } #(end of dihedrals by type)\n\n\n\n\n")

        sys.stdout.write("  # ---------- Improper Interactions: ----------\n"
                         "  # " + improper_style_link + "\n"
                         "  # Syntax:\n"
                         "  # improper_coeff ImproperTypeName  " +
                         ("ImproperStyleName  " if use_hybrid else "") +
                         "parameters\n\n")

        sys.stdout.write("  write_once(\"In Settings\") {\n")
        for itype in impropers_by_type:
            ffid1 = itype[0] if itype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = itype[1] if itype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = itype[2] if itype[2] != ("0" * zeropad_ffid) else "X"
            ffid4 = itype[3] if itype[3] != ("0" * zeropad_ffid) else "X"
            (k, angle0) = impropers_by_type[itype]
            sys.stdout.write("    improper_coeff @improper:" + ffid1 + "_" +
                             ffid2 + "_" + ffid3 + "_" + ffid4 + " " +
                             (improper_style_name if use_hybrid else "") +
                             " " + str(k) + " " + str(angle0) + "\n")
        sys.stdout.write("  } #(end of improper_coeffs)\n\n")

        sys.stdout.write(
            "  # Rules for creating improper interactions according to atom type:\n"
            "  #   ImproperTypeName   AtomType1    AtomType2       AtomType3       AtomType4\n"
            "  #   (* = wildcard)\n")

        sys.stdout.write(
            "  write_once(\"Data Impropers By Type (opls_imp.py)\") {\n")
        for itype in impropers_by_type:
            ffid1 = itype[0] if itype[0] != ("0" * zeropad_ffid) else "X"
            ffid2 = itype[1] if itype[1] != ("0" * zeropad_ffid) else "X"
            ffid3 = itype[2] if itype[2] != ("0" * zeropad_ffid) else "X"
            ffid4 = itype[3] if itype[3] != ("0" * zeropad_ffid) else "X"
            sys.stdout.write("    @improper:" + ffid1 + "_" + ffid2 + "_" +
                             ffid3 + "_" + ffid4)
            ffid1 = "@atom:*_b*_a*_d*_i" + itype[0] + "*" if itype[0] != (
                "0" * zeropad_ffid) else "@atom:*"
            ffid2 = "@atom:*_b*_a*_d*_i" + itype[1] + "*" if itype[1] != (
                "0" * zeropad_ffid) else "@atom:*"
            ffid3 = "@atom:*_b*_a*_d*_i" + itype[2] + "*" if itype[2] != (
                "0" * zeropad_ffid) else "@atom:*"
            ffid4 = "@atom:*_b*_a*_d*_i" + itype[3] + "*" if itype[3] != (
                "0" * zeropad_ffid) else "@atom:*"
            sys.stdout.write(" " + ffid1 + " " + ffid2 + " " + ffid3 + " " +
                             ffid4 + "\n")
        sys.stdout.write("  } #(end of impropers by type)\n\n\n\n\n")

        sys.stdout.write("  # --------   (descriptive comment)   --------\n")
        sys.stdout.write(
            "  # ---- biologically relevant atom types: ----\n  # ")
        sys.stdout.write("\n  # ".join(lines_biotype))
        sys.stdout.write("\n  # ----------   (end of comment)   ----------\n")
        sys.stdout.write("\n\n\n\n")

        sys.stdout.write(
            "  # LAMMPS supports many different kinds of bonded and non-bonded\n"
            "  # interactions which can be selected at run time.  Eventually\n"
            "  # we must inform LAMMPS which of them we will need.  We specify\n"
            "  # this in the \"In Init\" section: \n\n")

        sys.stdout.write("  write_once(\"In Init\") {\n")
        sys.stdout.write("    units real\n")
        sys.stdout.write("    atom_style full\n")
        sys.stdout.write("    bond_style " +
                         ("hybrid " if use_hybrid else "") + bond_style_name +
                         "\n")
        sys.stdout.write("    angle_style " +
                         ("hybrid " if use_hybrid else "") + angle_style_name +
                         "\n")
        sys.stdout.write("    dihedral_style " +
                         ("hybrid " if use_hybrid else "") +
                         dihedral_style_name + "\n")
        sys.stdout.write("    improper_style " +
                         ("hybrid " if use_hybrid else "") +
                         improper_style_name + "\n")
        sys.stdout.write(pair_style_command)
        sys.stdout.write("    pair_modify mix " + mixing_style + "\n")
        sys.stdout.write("    " + special_bonds_command + "\n")
        sys.stdout.write(kspace_style)
        sys.stdout.write("  } #end of init parameters\n\n")

        sys.stdout.write(
            "  # Note: We use \"hybrid\" styles in case the user later wishes to\n"
            "  #       combine the molecules built using this force-field with other\n"
            "  #       molecules that use other styles.  (This is not necessarily\n"
            "  #       a good idea, but LAMMPS and moltemplate both allow it.)\n"
            "  #       For more information:\n"
            "  #       http://lammps.sandia.gov/doc/pair_hybrid.html\n"
            "  #       http://lammps.sandia.gov/doc/bond_hybrid.html\n"
            "  #       http://lammps.sandia.gov/doc/angle_hybrid.html\n"
            "  #       http://lammps.sandia.gov/doc/dihedral_hybrid.html\n"
            "  #       http://lammps.sandia.gov/doc/improper_hybrid.html\n\n\n"
        )

        sys.stdout.write("}  # " + ffname + "\n\n")

        #sys.stderr.write(" done.\n")

        if filename_in != "":
            file_in.close()

    except Exception as err:
        sys.stderr.write('\n\n' + str(err) + '\n')
        sys.exit(1)
Exemple #41
0
class Api(object):
    """
    The main entry point for the application.
    You need to initialize it with a Flask Application: ::

    >>> app = Flask(__name__)
    >>> api = restful.Api(app)

    Alternatively, you can use :meth:`init_app` to set the Flask application
    after it has been constructed.

    :param app: the Flask application object
    :type app: flask.Flask
    :type app: flask.Blueprint
    :param prefix: Prefix all routes with a value, eg v1 or 2010-04-01
    :type prefix: str
    :param default_mediatype: The default media type to return
    :type default_mediatype: str
    :param decorators: Decorators to attach to every resource
    :type decorators: list
    :param catch_all_404s: Use :meth:`handle_error`
        to handle 404 errors throughout your app
    :param serve_challenge_on_401: Whether to serve a challenge response to
        clients on receiving 401. This usually leads to a username/password
        popup in web browers.
    :param url_part_order: A string that controls the order that the pieces
        of the url are concatenated when the full url is constructed.  'b'
        is the blueprint (or blueprint registration) prefix, 'a' is the api
        prefix, and 'e' is the path component the endpoint is added with
    :type catch_all_404s: bool
    :param errors: A dictionary to define a custom response for each
        exception or error raised during a request
    :type errors: dict

    """

    def __init__(self, app=None, prefix='',
                 default_mediatype='application/json', decorators=None,
                 catch_all_404s=False, serve_challenge_on_401=False,
                 url_part_order='bae', errors=None):
        self.representations = OrderedDict(DEFAULT_REPRESENTATIONS)
        self.urls = {}
        self.prefix = prefix
        self.default_mediatype = default_mediatype
        self.decorators = decorators if decorators else []
        self.catch_all_404s = catch_all_404s
        self.serve_challenge_on_401 = serve_challenge_on_401
        self.url_part_order = url_part_order
        self.errors = errors or {}
        self.blueprint_setup = None
        self.endpoints = set()
        self.resources = []
        self.app = None
        self.blueprint = None

        if app is not None:
            self.app = app
            self.init_app(app)

    def init_app(self, app):
        """Initialize this class with the given :class:`flask.Flask`
        application or :class:`flask.Blueprint` object.

        :param app: the Flask application or blueprint object
        :type app: flask.Flask
        :type app: flask.Blueprint

        Examples::

            api = Api()
            api.add_resource(...)
            api.init_app(app)

        """
        # If app is a blueprint, defer the initialization
        try:
            app.record(self._deferred_blueprint_init)
        # Flask.Blueprint has a 'record' attribute, Flask.Api does not
        except AttributeError:
            self._init_app(app)
        else:
            self.blueprint = app

    def _complete_url(self, url_part, registration_prefix):
        """This method is used to defer the construction of the final url in
        the case that the Api is created with a Blueprint.

        :param url_part: The part of the url the endpoint is registered with
        :param registration_prefix: The part of the url contributed by the
            blueprint.  Generally speaking, BlueprintSetupState.url_prefix
        """
        parts = {
            'b': registration_prefix,
            'a': self.prefix,
            'e': url_part
        }
        return ''.join(parts[key] for key in self.url_part_order if parts[key])

    @staticmethod
    def _blueprint_setup_add_url_rule_patch(blueprint_setup, rule, endpoint=None, view_func=None, **options):
        """Method used to patch BlueprintSetupState.add_url_rule for setup
        state instance corresponding to this Api instance.  Exists primarily
        to enable _complete_url's function.

        :param blueprint_setup: The BlueprintSetupState instance (self)
        :param rule: A string or callable that takes a string and returns a
            string(_complete_url) that is the url rule for the endpoint
            being registered
        :param endpoint: See BlueprintSetupState.add_url_rule
        :param view_func: See BlueprintSetupState.add_url_rule
        :param **options: See BlueprintSetupState.add_url_rule
        """

        if callable(rule):
            rule = rule(blueprint_setup.url_prefix)
        elif blueprint_setup.url_prefix:
            rule = blueprint_setup.url_prefix + rule
        options.setdefault('subdomain', blueprint_setup.subdomain)
        if endpoint is None:
            endpoint = _endpoint_from_view_func(view_func)
        defaults = blueprint_setup.url_defaults
        if 'defaults' in options:
            defaults = dict(defaults, **options.pop('defaults'))
        blueprint_setup.app.add_url_rule(rule, '%s.%s' % (blueprint_setup.blueprint.name, endpoint),
                                         view_func, defaults=defaults, **options)

    def _deferred_blueprint_init(self, setup_state):
        """Synchronize prefix between blueprint/api and registration options, then
        perform initialization with setup_state.app :class:`flask.Flask` object.
        When a :class:`flask_restbolt.Api` object is initialized with a blueprint,
        this method is recorded on the blueprint to be run when the blueprint is later
        registered to a :class:`flask.Flask` object.  This method also monkeypatches
        BlueprintSetupState.add_url_rule with _blueprint_setup_add_url_rule_patch.

        :param setup_state: The setup state object passed to deferred functions
            during blueprint registration
        :type setup_state: flask.blueprints.BlueprintSetupState

        """

        self.blueprint_setup = setup_state
        if setup_state.add_url_rule.__name__ != '_blueprint_setup_add_url_rule_patch':
            setup_state._original_add_url_rule = setup_state.add_url_rule
            setup_state.add_url_rule = MethodType(Api._blueprint_setup_add_url_rule_patch,
                                                  setup_state)
        if not setup_state.first_registration:
            raise ValueError('flask-restful blueprints can only be registered once.')
        self._init_app(setup_state.app)

    def _init_app(self, app):
        """Perform initialization actions with the given :class:`flask.Flask`
        object.

        :param app: The flask application object
        :type app: flask.Flask
        """
        app.handle_exception = partial(self.error_router, app.handle_exception)
        app.handle_user_exception = partial(self.error_router, app.handle_user_exception)

        if len(self.resources) > 0:
            for resource, urls, kwargs in self.resources:
                self._register_view(app, resource, *urls, **kwargs)

    def owns_endpoint(self, endpoint):
        """Tests if an endpoint name (not path) belongs to this Api.  Takes
        in to account the Blueprint name part of the endpoint name.

        :param endpoint: The name of the endpoint being checked
        :return: bool
        """

        if self.blueprint:
            if endpoint.startswith(self.blueprint.name):
                endpoint = endpoint.split(self.blueprint.name + '.', 1)[-1]
            else:
                return False
        return endpoint in self.endpoints

    def _should_use_fr_error_handler(self):
        """ Determine if error should be handled with FR or default Flask

        The goal is to return Flask error handlers for non-FR-related routes,
        and FR errors (with the correct media type) for FR endpoints. This
        method currently handles 404 and 405 errors.

        :return: bool
        """
        adapter = current_app.create_url_adapter(request)

        try:
            adapter.match()
        except MethodNotAllowed as e:
            # Check if the other HTTP methods at this url would hit the Api
            valid_route_method = e.valid_methods[0]
            rule, _ = adapter.match(method=valid_route_method, return_rule=True)
            return self.owns_endpoint(rule.endpoint)
        except NotFound:
            return self.catch_all_404s
        except:
            # Werkzeug throws other kinds of exceptions, such as Redirect
            pass

    def _has_fr_route(self):
        """Encapsulating the rules for whether the request was to a Flask endpoint"""
        # 404's, 405's, which might not have a url_rule
        if self._should_use_fr_error_handler():
            return True
        # for all other errors, just check if FR dispatched the route
        if not request.url_rule:
            return False
        return self.owns_endpoint(request.url_rule.endpoint)

    def error_router(self, original_handler, e):
        """This function decides whether the error occured in a flask-restful
        endpoint or not. If it happened in a flask-restful endpoint, our
        handler will be dispatched. If it happened in an unrelated view, the
        app's original error handler will be dispatched.
        In the event that the error occurred in a flask-restful endpoint but
        the local handler can't resolve the situation, the router will fall
        back onto the original_handler as last resort.

        :param original_handler: the original Flask error handler for the app
        :type original_handler: function
        :param e: the exception raised while handling the request
        :type e: Exception

        """
        if self._has_fr_route():
            try:
                return self.handle_error(e)
            except Exception:
                pass  # Fall through to original handler
        return original_handler(e)

    def handle_error(self, e):
        """Error handler for the API transforms a raised exception into a Flask
        response, with the appropriate HTTP status code and body.

        :param e: the raised Exception object
        :type e: Exception

        """
        got_request_exception.send(current_app._get_current_object(), exception=e)

        if not isinstance(e, HTTPException) and current_app.propagate_exceptions:
            exc_type, exc_value, tb = sys.exc_info()
            if exc_value is e:
                raise
            else:
                raise e

        headers = Headers()
        if isinstance(e, HTTPException):
            code = e.code
            default_data = {
                'message': getattr(e, 'description', http_status_message(code))
            }
            headers = e.get_response().headers
        else:
            code = 500
            default_data = {
                'message': http_status_message(code),
            }

        # Werkzeug exceptions generate a content-length header which is added
        # to the response in addition to the actual content-length header
        # https://github.com/flask-restful/flask-restful/issues/534
        remove_headers = ('Content-Length',)

        for header in remove_headers:
            headers.pop(header, None)

        data = getattr(e, 'data', default_data)

        if code >= 500:
            exc_info = sys.exc_info()
            if exc_info[1] is None:
                exc_info = None
            current_app.log_exception(exc_info)

        help_on_404 = current_app.config.get("ERROR_404_HELP", True)
        if code == 404 and help_on_404:
            rules = dict([(re.sub('(<.*>)', '', rule.rule), rule.rule)
                          for rule in current_app.url_map.iter_rules()])
            close_matches = difflib.get_close_matches(request.path, rules.keys())
            if close_matches:
                # If we already have a message, add punctuation and continue it.
                if "message" in data:
                    data["message"] = data["message"].rstrip('.') + '. '
                else:
                    data["message"] = ""

                data['message'] += 'You have requested this URI [' + request.path + \
                                   '] but did you mean ' + \
                                   ' or '.join((
                                       rules[match] for match in close_matches)
                                   ) + ' ?'

        error_cls_name = type(e).__name__
        if error_cls_name in self.errors:
            custom_data = self.errors.get(error_cls_name, {})
            code = custom_data.get('status', 500)
            data.update(custom_data)

        if code == 406 and self.default_mediatype is None:
            # if we are handling NotAcceptable (406), make sure that
            # make_response uses a representation we support as the
            # default mediatype (so that make_response doesn't throw
            # another NotAcceptable error).
            supported_mediatypes = list(self.representations.keys())
            fallback_mediatype = supported_mediatypes[0] if supported_mediatypes else "text/plain"
            resp = self.make_response(
                data,
                code,
                headers,
                fallback_mediatype = fallback_mediatype
            )
        else:
            resp = self.make_response(data, code, headers)

        if code == 401:
            resp = self.unauthorized(resp)
        return resp

    def mediatypes_method(self):
        """Return a method that returns a list of mediatypes
        """
        return lambda resource_cls: self.mediatypes() + [self.default_mediatype]

    def add_resource(self, resource, *urls, **kwargs):
        """Adds a resource to the api.

        :param resource: the class name of your resource
        :type resource: :class:`Resource`

        :param urls: one or more url routes to match for the resource, standard
                     flask routing rules apply.  Any url variables will be
                     passed to the resource method as args.
        :type urls: str

        :param endpoint: endpoint name (defaults to :meth:`Resource.__name__.lower`
            Can be used to reference this route in :class:`fields.Url` fields
        :type endpoint: str

        :param resource_class_args: args to be forwarded to the constructor of
            the resource.
        :type resource_class_args: tuple

        :param resource_class_kwargs: kwargs to be forwarded to the constructor
            of the resource.
        :type resource_class_kwargs: dict

        Additional keyword arguments not specified above will be passed as-is
        to :meth:`flask.Flask.add_url_rule`.

        Examples::

            api.add_resource(HelloWorld, '/', '/hello')
            api.add_resource(Foo, '/foo', endpoint="foo")
            api.add_resource(FooSpecial, '/special/foo', endpoint="foo")

        """
        if self.app is not None:
            self._register_view(self.app, resource, *urls, **kwargs)
        else:
            self.resources.append((resource, urls, kwargs))

    def resource(self, *urls, **kwargs):
        """Wraps a :class:`~flask_restbolt.Resource` class, adding it to the
        api. Parameters are the same as :meth:`~flask_restbolt.Api.add_resource`.

        Example::

            app = Flask(__name__)
            api = restful.Api(app)

            @api.resource('/foo')
            class Foo(Resource):
                def get(self):
                    return 'Hello, World!'

        """
        def decorator(cls):
            self.add_resource(cls, *urls, **kwargs)
            return cls
        return decorator

    def _register_view(self, app, resource, *urls, **kwargs):
        endpoint = kwargs.pop('endpoint', None) or resource.__name__.lower()
        self.endpoints.add(endpoint)
        resource_class_args = kwargs.pop('resource_class_args', ())
        resource_class_kwargs = kwargs.pop('resource_class_kwargs', {})

        # NOTE: 'view_functions' is cleaned up from Blueprint class in Flask 1.0
        if endpoint in getattr(app, 'view_functions', {}):
            previous_view_class = app.view_functions[endpoint].__dict__['view_class']

            # if you override the endpoint with a different class, avoid the collision by raising an exception
            if previous_view_class != resource:
                raise ValueError('This endpoint (%s) is already set to the class %s.' % (endpoint, previous_view_class.__name__))

        resource.mediatypes = self.mediatypes_method()  # Hacky
        resource.endpoint = endpoint
        resource_func = self.output(resource.as_view(endpoint, *resource_class_args,
            **resource_class_kwargs))

        for decorator in self.decorators:
            resource_func = decorator(resource_func)

        for url in urls:
            # If this Api has a blueprint
            if self.blueprint:
                # And this Api has been setup
                if self.blueprint_setup:
                    # Set the rule to a string directly, as the blueprint is already
                    # set up.
                    self.blueprint_setup.add_url_rule(url, view_func=resource_func, **kwargs)
                    continue
                else:
                    # Set the rule to a function that expects the blueprint prefix
                    # to construct the final url.  Allows deferment of url finalization
                    # in the case that the associated Blueprint has not yet been
                    # registered to an application, so we can wait for the registration
                    # prefix
                    rule = partial(self._complete_url, url)
            else:
                # If we've got no Blueprint, just build a url with no prefix
                rule = self._complete_url(url, '')
            # Add the url to the application or blueprint
            app.add_url_rule(rule, view_func=resource_func, **kwargs)

    def output(self, resource):
        """Wraps a resource (as a flask view function), for cases where the
        resource does not directly return a response object

        :param resource: The resource as a flask view function
        """
        @wraps(resource)
        def wrapper(*args, **kwargs):
            resp = resource(*args, **kwargs)
            if isinstance(resp, ResponseBase):  # There may be a better way to test
                return resp
            data, code, headers = unpack(resp)
            return self.make_response(data, code, headers=headers)
        return wrapper

    def url_for(self, resource, **values):
        """Generates a URL to the given resource.

        Works like :func:`flask.url_for`."""
        endpoint = resource.endpoint
        if self.blueprint:
            endpoint = '{0}.{1}'.format(self.blueprint.name, endpoint)
        return url_for(endpoint, **values)

    def make_response(self, data, *args, **kwargs):
        """Looks up the representation transformer for the requested media
        type, invoking the transformer to create a response object. This
        defaults to default_mediatype if no transformer is found for the
        requested mediatype. If default_mediatype is None, a 406 Not
        Acceptable response will be sent as per RFC 2616 section 14.1

        :param data: Python object containing response data to be transformed
        """
        default_mediatype = kwargs.pop('fallback_mediatype', None) or self.default_mediatype
        mediatype = request.accept_mimetypes.best_match(
            self.representations,
            default=default_mediatype,
        )
        if mediatype is None:
            raise NotAcceptable()
        if mediatype in self.representations:
            resp = self.representations[mediatype](data, *args, **kwargs)
            resp.headers['Content-Type'] = mediatype
            return resp
        elif mediatype == 'text/plain':
            resp = original_flask_make_response(str(data), *args, **kwargs)
            resp.headers['Content-Type'] = 'text/plain'
            return resp
        else:
            raise InternalServerError()

    def mediatypes(self):
        """Returns a list of requested mediatypes sent in the Accept header"""
        return [h for h, q in sorted(request.accept_mimetypes,
                                     key=operator.itemgetter(1), reverse=True)]

    def representation(self, mediatype):
        """Allows additional representation transformers to be declared for the
        api. Transformers are functions that must be decorated with this
        method, passing the mediatype the transformer represents. Three
        arguments are passed to the transformer:

        * The data to be represented in the response body
        * The http status code
        * A dictionary of headers

        The transformer should convert the data appropriately for the mediatype
        and return a Flask response object.

        Ex::

            @api.representation('application/xml')
            def xml(data, code, headers):
                resp = make_response(convert_data_to_xml(data), code)
                resp.headers.extend(headers)
                return resp
        """
        def wrapper(func):
            self.representations[mediatype] = func
            return func
        return wrapper

    def unauthorized(self, response):
        """ Given a response, change it to ask for credentials """

        if self.serve_challenge_on_401:
            realm = current_app.config.get("HTTP_BASIC_AUTH_REALM", "flask-restful")
            challenge = u"{0} realm=\"{1}\"".format("Basic", realm)

            response.headers['WWW-Authenticate'] = challenge
        return response
Exemple #42
0
def group_all_export(request, group_slug):
    """
    Export all group members for a specific group
    """
    group = get_object_or_404(Group, slug=group_slug)

    # if they can edit it, they can export it
    if not has_perm(request.user, 'user_groups.change_group', group):
        raise Http403

    import xlwt
    from ordereddict import OrderedDict
    from django.db import connection
    from tendenci.apps.forms_builder.forms.models import FieldEntry

    # create the excel book and sheet
    book = xlwt.Workbook(encoding='utf8')
    sheet = book.add_sheet('Group Members and Subscribers')

    #initialize indexes
    row_index = {}
    col_index = {}

    #---------
    # MEMBERS
    #---------
    # excel date styles
    default_style = xlwt.Style.default_style
    datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm')
    date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy')

    # the key is what the column will be in the
    # excel sheet. the value is the database lookup
    # Used OrderedDict to maintain the column order
    group_mappings = OrderedDict([
        ('user_id', 'au.id'),
        ('first_name', 'au.first_name'),
        ('last_name', 'au.last_name'),
        ('email', 'au.email'),
        ('receives email', 'pp.direct_mail'),
        ('company', 'pp.company'),
        ('address', 'pp.address'),
        ('address2', 'pp.address2'),
        ('city', 'pp.city'),
        ('state', 'pp.state'),
        ('zipcode', 'pp.zipcode'),
        ('country', 'pp.country'),
        ('phone', 'pp.phone'),
        ('is_active', 'au.is_active'),
        ('date', 'gm.create_dt'),
    ])
    group_lookups = ','.join(group_mappings.values())

    # Use custom sql to fetch the rows because we need to
    # populate the user profiles information and you
    # cannot do that with django's ORM without using
    # profile for each user query
    # pulling 13,000 group members can be done in one
    # query using Django's ORM but then you need
    # 13,000 individual queries :(
    cursor = connection.cursor()
    sql = "SELECT %s FROM user_groups_groupmembership gm \
           INNER JOIN auth_user au ON (au.id = gm.member_id) \
           LEFT OUTER JOIN profiles_profile pp \
           on (pp.user_id = gm.member_id) WHERE group_id = %%s;"

    sql = sql % group_lookups
    cursor.execute(sql, [group.pk])
    values_list = list(cursor.fetchall())

    # index the group key mappings and insert them into the sheet.
    for key in group_mappings.keys():
        if not key in col_index:
            col = len(col_index.keys())
            col_index[key] = col
            sheet.write(0, col, key, style=default_style)

    if values_list:
        # Write the data enumerated to the excel sheet
        for row, row_data in enumerate(values_list):
            for col, val in enumerate(row_data):

                if not row in row_index:
                    # assign the row if it is not yet available
                    row_index[row] = row + 1

                # styles the date/time fields
                if isinstance(val, datetime):
                    style = datetime_style
                elif isinstance(val, date):
                    style = date_style
                else:
                    style = default_style

                sheet.write(row + 1, col, val, style=style)

    #-------------
    # Subscribers
    #-------------
    entries = FieldEntry.objects.filter(
        entry__subscriptions__group=group).distinct()

    for entry in entries:
        val = entry.value
        field = entry.field.label.lower().replace(" ", "_")

        if "subscriber %s" % str(entry.entry.pk) in row_index:
            # get the subscriber's row number
            row = row_index["subscriber %s" % str(entry.entry.pk)]
        else:
            # assign the row if it is not yet available
            row = len(row_index.keys()) + 1
            row_index["subscriber %s" % str(entry.entry.pk)] = row

        if field in col_index:
            # get the entry's col number
            col = col_index[field]
        else:
            # assign the col if it is not yet available
            # and label the new column
            col = len(col_index.keys())
            col_index[field] = col
            sheet.write(0, col, field, style=default_style)

        # styles the date/time fields
        if isinstance(val, datetime):
            style = datetime_style
        elif isinstance(val, date):
            style = date_style
        else:
            style = default_style

        sheet.write(row, col, val, style=style)

    response = HttpResponse(content_type='application/vnd.ms-excel')
    response[
        'Content-Disposition'] = 'attachment; filename=group_%s_all_export.xls' % group.pk
    book.save(response)
    return response
Exemple #43
0
class ObservationRows:
    """Store index file information.

       The ObserservationRows class defines a structure to get specific
       information about the spectra out of the index file which was
       produced by the sdfits filler program.

       This is essientially a table of the raw SDFITS file rows, organized
       with a lookup key of scan/feed/window/polarization.

       When rows are added to this object (addRow), the FITS extension,
       row of the FITS table and scan type are stored.

       A list of rows for each scan/feed/window/polarization can be
       retrieved with the 'get' method.

    """
    def __init__(self):
        self.rows = OrderedDict()
        self.Key = namedtuple('key', 'scan, feed, window, polarization')

    def __repr__(self):
        return ('Scans: {0}\nFeeds: {1}\nWindows: {2}\nPols: {3}'.format(self.scans(),
                                                                         self.feeds(),
                                                                         self.windows(),
                                                                         self.pols()))

    def addRow(self, scan, feed, window, polarization,
               fitsExtension, rowOfFitsFile, obsid,
               procname, procscan, nchans):
        """Add rows to the ObservationRows object.

           When rows are added to this object (addRow), the FITS extension,
           row of the FITS table and scan type are stored.

        """

        key = self.Key(scan, feed, window, polarization)

        if key in self.rows:
            self.rows[key]['ROW'].append(rowOfFitsFile)
        else:
            self.rows[key] = {'EXTENSION': fitsExtension,
                              'ROW': [rowOfFitsFile],
                              'OBSID': obsid,
                              'PROCNAME': procname,
                              'PROCSCAN': procscan,
                              'NCHANS': nchans}

    def get(self, scan, feed, window, polarization):
        """Retreive a list of rows for scan/feed/win/pol.

        """
        try:
            key = (scan, feed, window, polarization)
            return self.rows[key]
        except(KeyError):
            raise

    def scans(self):
        """Return a list of scans in the observation.

        """
        return sorted(list(set([xx.scan for xx in self.rows.keys()])))

    def feeds(self):
        """Return a list of feeds in the observation.

        """
        return list(set([xx.feed for xx in self.rows.keys()]))

    def windows(self):
        """Return a list of windows in the observation.

        """
        return list(set([xx.window for xx in self.rows.keys()]))

    def pols(self):
        """Return a list of polarizations in the observation.

        """
        return list(set([xx.polarization for xx in self.rows.keys()]))
Exemple #44
0
class Application(object):
    """Poor WSGI application which is called by WSGI server.

    Working of is describe in PEP 0333. This object store route dispatch table,
    and have methods for it's using and of course __call__ method for use
    as WSGI application.
    """

    __instances = []

    def __init__(self, name="__main__"):
        """Application class is per name singleton.

        That means, there could be exist only one instance with same name.
        """

        if Application.__instances.count(name):
            raise RuntimeError('Application with name %s exist yet.' % name)
        Application.__instances.append(name)

        # Application name
        self.__name = name

        # list of pre and post process handlers
        self.__pre = []
        self.__post = []

        # dhandlers table for default handers on methods {METHOD_GET: handler}
        self.__dhandlers = {}

        # handlers table of simple paths: {'/path': {METHOD_GET: handler}}
        self.__handlers = {}

        self.__filters = {
            ':int': (r'-?\d+', int),
            ':float': (r'-?\d+(\.\d+)?', float),
            ':word': (r'\w+', uni),
            ':hex': (r'[0-9a-fA-F]+', str),
            ':re:': (None, uni),
            'none': (r'[^/]+', uni)
        }

        # handlers of regex paths: {r'/user/([a-z]?)': {METHOD_GET: handler}}
        self.__rhandlers = OrderedDict()

        # http state handlers: {HTTP_NOT_FOUND: {METHOD_GET: my_404_handler}}
        self.__shandlers = {}

        # -- Application variable
        self.__config = {
            'auto_args': True,
            'auto_form': True,
            'auto_json': True,
            'keep_blank_values': 0,
            'strict_parsing': 0,
            'json_content_types': [
                'application/json',
                'application/javascript',
                'application/merge-patch+json'],
            'form_content_types': [
                'application/x-www-form-urlencoded',
                'multipart/form-data'
            ],
            'auto_cookies': True,
            'debug': 'Off',
            'document_root': '',
            'document_index': 'Off',
            'secret_key': '%s%s%s%s' %
                          (__version__, version, getcwd(),
                           ''.join(str(x) for x in uname()))
        }

        try:
            self.__log_level = levels[environ.get('poor_LogLevel',
                                                  'warn').lower()]
        except:
            self.__log_level = LOG_WARNING
            self.log_error('Bad poor_LogLevel, default is warn.', LOG_WARNING)
        # endtry
    # enddef

    def __regex(self, match):
        groups = match.groups()
        _filter = str(groups[1]).lower()

        if _filter in self.__filters:
            regex = self.__filters[_filter][0]
        elif _filter[:4] == ':re:':     # :re: filter have user defined regex
            regex = _filter[4:]
        else:
            try:
                regex = self.__filters[_filter][0]
            except KeyError:
                raise RuntimeError("Undefined route group filter '%s'" %
                                   _filter)

        return "(?P<%s>%s)" % (groups[0], regex)
    # enddef

    def __convertor(self, _filter):
        _filter = str(_filter).lower()
        _filter = ':re:' if _filter[:4] == ':re:' else _filter
        try:
            return self.__filters[_filter][1]
        except KeyError:
            raise RuntimeError("Undefined route group filter '%s'" % _filter)

    @property
    def name(self):
        """Return application name."""
        return self.__name

    @property
    def filters(self):
        """Copy of filter table.

        Filter table contains regular expressions and convert functions,
        see Application.set_filter and Application.route.

        Default filters are:
            :int - match number and convert it to int
            :float - match number and convert it to float
            :word - match one unicoee word
            :hex - match hexadecimal value and convert it to str
            :re: - match user defined regular expression
            none - match any string withount '/' character

        For more details see {/debug-info} page of your application, where
        you see all filters with regular expression definition.
        """
        return self.__filters.copy()

    @property
    def pre(self):
        """Tuple of table with pre-process handlers.

        See Application.pre_process.
        """
        return tuple(self.__pre)

    @property
    def post(self):
        """Tuple of table with post-process handlers.

        See Application.post_process.
        """
        return tuple(self.__post)

    @property
    def dhandlers(self):
        """Copy of table with default handlers.

        See Application.set_default
        """
        return self.__dhandlers.copy()

    @property
    def handlers(self):
        """Copy of table with static handlers.

        See Application.route.
        """
        return self.__handlers.copy()

    @property
    def rhandlers(self):
        """Copy of table with regular expression handlers.

        See Application.route and Application.rroute.
        """
        return self.__rhandlers.copy()

    @property
    def shandlers(self):
        """Copy of table with http state aka error handlers.

        See Application.http_state
        """
        return self.__shandlers.copy()

    @property
    def auto_args(self):
        """Automatic parsing request arguments from uri.

        If it is True (default), Request object do automatic parsing request
        uri to its args variable.
        """
        return self.__config['auto_args']

    @auto_args.setter
    def auto_args(self, value):
        self.__config['auto_args'] = bool(value)

    @property
    def auto_form(self):
        """Automatic parsing arguments from request body.

        If it is True (default) and method is POST, PUT or PATCH, and
        request content type is one of form_content_types, Request
        object do automatic parsing request body to its form variable.
        """
        return self.__config['auto_form']

    @auto_form.setter
    def auto_form(self, value):
        self.__config['auto_form'] = bool(value)

    @property
    def auto_json(self):
        """Automatic parsing JSON from request body.

        If it is True (default), method is POST, PUT or PATCH and request
        content type is one of json_content_types, Request object do
        automatic parsing request body to json variable.
        """
        return self.__config['auto_json']

    @auto_json.setter
    def auto_json(self, value):
        self.__config['auto_json'] = bool(value)

    @property
    def auto_cookies(self):
        """Automatic parsing cookies from request headers.

        If it is True (default) and Cookie request header was set,
        SimpleCookie object was paresed to Request property cookies.
        """
        return self.__config['auto_cookies']

    @auto_cookies.setter
    def auto_cookies(self, value):
        self.__config['auto_cookies'] = bool(value)

    @property
    def debug(self):
        """Application debug as another way how to set poor_Debug.

        This setting will be rewrite by poor_Debug environment variable.
        """
        return self.__config['debug'] == 'On'

    @debug.setter
    def debug(self, value):
        self.__config['debug'] = 'On' if bool(value) else 'Off'

    @property
    def document_root(self):
        """Application document_root as another way how to set poor_DocumentRoot.

        This setting will be rewrite by poor_DocumentRoot environ variable.
        """
        return self.__config['document_root']

    @document_root.setter
    def document_root(self, value):
        self.__config['document_root'] = value

    @property
    def document_index(self):
        """Application document_root as another way how to set poor_DocumentRoot.

        This setting will be rewrite by poor_DocumentRoot environ variable.
        """
        return self.__config['document_index'] == 'On'

    @document_index.setter
    def document_index(self, value):
        self.__config['document_index'] = 'On' if bool(value) else 'Off'

    @property
    def secret_key(self):
        """Application secret_key could be replace by poor_SecretKey in request.

        Secret key is used by PoorSession class. It is generate from
        some server variables, and the best way is set to your own long
        key."""
        return self.__config['secret_key']

    @secret_key.setter
    def secret_key(self, value):
        self.__config['secret_key'] = value

    @property
    def keep_blank_values(self):
        """Keep blank values in request arguments.

        If it is 1 (0 is default), automatic parsing request uri or body
        keep blank values as empty string.
        """
        return self.__config['keep_blank_values']

    @keep_blank_values.setter
    def keep_blank_values(self, value):
        self.__config['keep_blank_values'] = int(value)

    @property
    def strict_parsing(self):
        """Strict parse request arguments.

        If it is 1 (0 is default), automatic parsing request uri or body
        raise with exception on parsing error.
        """
        return self.__config['strict_parsing']

    @strict_parsing.setter
    def strict_parsing(self, value):
        self.__config['strict_parsing'] = int(value)

    @property
    def json_content_types(self):
        """Copy of json content type list.

        Containt list of strings as json content types, which is use for
        testing, when automatics Json object is create from request body.
        """
        return self.__config['json_content_types']

    @property
    def form_content_types(self):
        """Copy of form content type list.

        Containt list of strings as form content types, which is use for
        testing, when automatics Form object is create from request body.
        """
        return self.__config['form_content_types']

    def set_filter(self, name, regex, convertor=uni):
        """Create new filter or overwrite builtins.

        Arguments:
            name      - Name of filter which is used in route or set_route
                        method.
            regex     - regular expression which used for filter
            convertor - convertor function or class, which gets unicode in
                        input. Default is uni function, which is wrapper
                        to unicode string.

            app.set_filter('uint', r'\d+', int)
        """
        name = ':'+name if name[0] != ':' else name
        self.__filters[name] = (regex, convertor)

    def pre_process(self):
        """Append pre process hendler.

        This is decorator for function to call before each request.

            @app.pre_process()
            def before_each_request(req):
                ...
        """
        def wrapper(fn):
            self.__pre.append(fn)
            return fn
        return wrapper
    # enddef

    def add_pre_process(self, fn):
        """Append pre proccess handler.

        Method adds function to list functions which is call before each
        request.

            app.add_pre_process(before_each_request)
        """
        self.__pre.append(fn)
    # enddef

    def post_process(self):
        """Append post process handler.

        This decorator append function to be called after each request,
        if you want to use it redefined all outputs.

            @app.pre_process()
            def after_each_request(req):
                ...
        """
        def wrapper(fn):
            self.__post.append(fn)
            return fn
        return wrapper
    # enddef

    def add_post_process(self, fn):
        """Append post process handler.

        Method for direct append function to list functions which are called
        after each request.

            app.add_post_process(after_each_request)
        """
        self.__post.append(fn)
    # enddef

    def default(self, method=METHOD_HEAD | METHOD_GET):
        """Set default handler.

        This is decorator for default handler for http method (called before
        error_not_found).

            @app.default(METHOD_GET_POST)
            def default_get_post(req):
                # this function will be called if no uri match in internal
                # uri table with method. It's similar like not_found error,
                # but without error
                ...
        """
        def wrapper(fn):
            self.set_default(fn, method)
        return wrapper
    # enddef

    def set_default(self, fn, method=METHOD_HEAD | METHOD_GET):
        """Set default handler.

        Set fn default handler for http method called befor error_not_found.

            app.set_default(default_get_post, METHOD_GET_POST)
        """
        for m in methods.values():
            if method & m:
                self.__dhandlers[m] = fn
    # enddef

    def pop_default(self, method):
        """Pop default handler for method."""
        return self.__dhandlers(method)

    def route(self, uri, method=METHOD_HEAD | METHOD_GET):
        """Wrap function to be handler for uri and specified method.

        You can define uri as static path or as groups which are hand
        to handler as next parameters.

            # static uri
            @app.route('/user/post', method=METHOD_POST)
            def user_create(req):
                ...

            # group regular expression
            @app.route('/user/<name>')
            def user_detail(req, name):
                ...

            # group regular expression with filter
            @app.route('/<surname:word>/<age:int>')
            def surnames_by_age(req, surname, age):
                ...

            # group with own regular expression filter
            @app.route('/<car:re:\w+>/<color:re:#[\da-fA-F]+>')
            def car(req, car, color):
                ...

        If you can use some name of group which is python keyword, like class,
        you can use **kwargs syntax:

            @app.route('/<class>/<len:int>')
            def classes(req, **kwargs):
                return "'%s' class is %d lenght." % \
                    (kwargs['class'], kwargs['len'])

        Be sure with ordering of call this decorator or set_route function with
        groups regular expression. Regular expression routes are check with the
        same ordering, as you create internal table of them. First match stops
        any other searching. In fact, if groups are detect, they will be
        transfer to normal regular expression, and will be add to second
        internal table.
        """
        def wrapper(fn):
            self.set_route(uri, fn, method)
            return fn
        return wrapper
    # enddef

    def set_route(self, uri, fn, method=METHOD_HEAD | METHOD_GET):
        """Set handler for uri and method.

        Another way to add fn as handler for uri. See Application.route
        documentation for details.

            app.set_route('/use/post', user_create, METHOD_POST)
        """
        uri = uni(uri)

        if re_filter.search(uri):
            r_uri = re_filter.sub(self.__regex, uri) + '$'
            convertors = tuple((g[0], self.__convertor(g[1]))
                               for g in (m.groups()
                               for m in re_filter.finditer(uri)))
            self.set_rroute(r_uri, fn, method, convertors)
        else:
            if uri not in self.__handlers:
                self.__handlers[uri] = {}
            for m in methods.values():
                if method & m:
                    self.__handlers[uri][m] = fn
    # enddef

    def pop_route(self, uri, method):
        """Pop handler for uri and method from handers table.

        Method must be define unique, so METHOD_GET_POST could not be use.
        If you want to remove handler for both methods, you must call pop route
        for each method state.
        """
        uri = uni(uri)

        if re_filter.search(uri):
            r_uri = re_filter.sub(self.__regex, uri) + '$'
            return self.pop_rroute(r_uri, method)
        else:
            handlers = self.__handlers.get(uri, {})
            rv = handlers.pop(method)
            if not handlers:    # is empty
                self.__handlers.pop(uri, None)
            return rv

    def is_route(self, uri):
        """Check if uri have any registered record."""
        uri = uni(uri)
        if re_filter.search(uri):
            r_uri = re_filter.sub(self.__regex, uri) + '$'
            return self.is_rroute(r_uri)
        return uri in self.__handlers

    def rroute(self, ruri, method=METHOD_HEAD | METHOD_GET):
        """Wrap function to be handler for uri defined by regular expression.

        Both of function, rroute and set_rroute store routes to special
        internal table, which is another to table of static routes.

            @app.rroute(r'/user/\w+')               # simple regular expression
            def any_user(req):
                ...

            @app.rroute(r'/user/(?P<user>\w+)')     # regular expression with
            def user_detail(req, user):             # groups
                ...

        Be sure with ordering of call this decorator or set_rroute function.
        Regular expression routes are check with the same ordering, as you
        create internal table of them. First match stops any other searching.
        """
        def wrapper(fn):
            self.set_rroute(ruri, fn, method)
            return fn
        return wrapper
    # enddef

    def set_rroute(self, r_uri, fn, method=METHOD_HEAD | METHOD_GET,
                   convertors=()):
        """Set hanlder for uri defined by regular expression.

        Another way to add fn as handler for uri defined by regular expression.
        See Application.rroute documentation for details.

            app.set_rroute('/use/\w+/post', user_create, METHOD_POST)

        This method is internally use, when groups are found in static route,
        adding by route or set_route method.
        """
        r_uri = re.compile(r_uri, re.U)
        if r_uri not in self.__rhandlers:
            self.__rhandlers[r_uri] = {}
        for m in methods.values():
            if method & m:
                self.__rhandlers[r_uri][m] = (fn, convertors)
    # enddef

    def pop_rroute(self, r_uri, method):
        """Pop handler and convertors for uri and method from handlers table.

        For mor details see Application.pop_route.
        """
        r_uri = re.compile(r_uri, re.U)
        handlers = self.__rhandlers.get(r_uri, {})
        rv = handlers.pop(method)
        if not handlers:    # is empty
            self.__rhandlers.pop(r_uri, None)
        return rv

    def is_rroute(self, r_uri):
        """Check if regular expression uri have any registered record."""
        r_uri = re.compile(r_uri, re.U)
        return r_uri in self.__rhandlers

    def http_state(self, code, method=METHOD_HEAD | METHOD_GET | METHOD_POST):
        """Wrap function to handle http status codes like http errors."""
        def wrapper(fn):
            self.set_http_state(code, fn, method)
        return wrapper
    # enddef

    def set_http_state(self, code, fn,
                       method=METHOD_HEAD | METHOD_GET | METHOD_POST):
        """Set fn as handler for http state code and method."""
        if code not in self.__shandlers:
            self.__shandlers[code] = {}
        for m in methods.values():
            if method & m:
                self.__shandlers[code][m] = fn
    # enddef

    def pop_http_state(self, code, method):
        """Pop handerl for http state and method.

        As Application.pop_route, for pop multimethod handler, you must call
        pop_http_state for each method.
        """
        handlers = self.__shandlers(code, {})
        return handlers.pop(method)

    def error_from_table(self, req, code):
        """Internal method, which is called if error was accured.

        If status code is in Application.shandlers (fill with http_state
        function), call this handler.
        """
        if code in self.__shandlers \
                and req.method_number in self.__shandlers[code]:
            try:
                handler = self.__shandlers[code][req.method_number]
                if 'uri_handler' not in req.__dict__:
                    req.uri_rule = '_%d_error_handler_' % code
                    req.uri_handler = handler
                self.handler_from_pre(req)       # call pre handlers now
                handler(req)
            except:
                internal_server_error(req)
        elif code in default_shandlers:
            handler = default_shandlers[code][METHOD_GET]
            handler(req)
        else:
            not_implemented(req, code)
    # enddef

    def handler_from_default(self, req):
        """Internal method, which is called if no handler is found."""
        if req.method_number in self.__dhandlers:
            req.uri_rule = '_default_handler_'
            req.uri_handler = self.__dhandlers[req.method_number]
            self.handler_from_pre(req)       # call pre handlers now
            retval = self.__dhandlers[req.method_number](req)
            if retval != DECLINED:
                raise SERVER_RETURN(retval)
    # enddef

    def handler_from_pre(self, req):
        """Internal method, which run all pre (pre_proccess) handlers.

        This method was call before end-point route handler.
        """
        for fn in self.__pre:
            fn(req)

    def handler_from_table(self, req):
        """Call right handler from handlers table (fill with route function).

        If no handler is fined, try to find directory or file if Document Root,
        resp. Document Index is set. Then try to call default handler for right
        method or call handler for status code 404 - not found.
        """

        # static routes
        if req.uri in self.__handlers:
            if req.method_number in self.__handlers[req.uri]:
                handler = self.__handlers[req.uri][req.method_number]
                req.uri_rule = req.uri      # nice variable for pre handlers
                req.uri_handler = handler
                self.handler_from_pre(req)  # call pre handlers now
                retval = handler(req)       # call right handler now
                # return text is allowed
                if isinstance(retval, str) \
                        or (_unicode_exist and isinstance(retval, unicode)):
                    req.write(retval, 1)    # write data and flush
                    retval = DONE
                if retval != DECLINED:
                    raise SERVER_RETURN(retval or DONE)  # could be state.DONE
            else:
                raise SERVER_RETURN(HTTP_METHOD_NOT_ALLOWED)
            # endif
        # endif

        # regular expression
        for ruri in self.__rhandlers.keys():
            match = ruri.match(req.uri)
            if match and req.method_number in self.__rhandlers[ruri]:
                handler, convertors = self.__rhandlers[ruri][req.method_number]
                req.uri_rule = ruri.pattern  # nice variable for pre handlers
                req.uri_handler = handler
                self.handler_from_pre(req)   # call pre handlers now
                if len(convertors):
                    # create OrderedDict from match insead of dict for
                    # convertors applying
                    req.groups = OrderedDict(
                        (g, c(v))for ((g, c), v) in zip(convertors,
                                                        match.groups()))
                    retval = handler(req, *req.groups.values())
                else:
                    req.groups = match.groupdict()
                    retval = handler(req, *match.groups())
                # return text is allowed
                if isinstance(retval, str) \
                        or (_unicode_exist and isinstance(retval, unicode)):
                    req.write(retval, 1)    # write data and flush
                    retval = DONE
                if retval != DECLINED:
                    raise SERVER_RETURN(retval or DONE)  # could be state.DONE
            # endif - no METHOD_NOT_ALLOWED here
        # endfor

        # try file or index
        if req.document_root():
            rfile = "%s%s" % (uni(req.document_root()),
                              path.normpath("%s" % uni(req.uri)))

            if not path.exists(rfile):
                if req.debug and req.uri == '/debug-info':      # work if debug
                    req.uri_rule = '_debug_info_'
                    req.uri_handler = debug_info
                    self.handler_from_pre(req)  # call pre handlers now
                    raise SERVER_RETURN(debug_info(req, self))
                self.handler_from_default(req)                  # try default
                raise SERVER_RETURN(HTTP_NOT_FOUND)             # not found

            # return file
            if path.isfile(rfile) and access(rfile, R_OK):
                req.uri_rule = '_send_file_'
                req.uri_handler = send_file
                self.handler_from_pre(req)      # call pre handlers now
                req.log_error("Return file: %s" % req.uri, LOG_INFO)
                raise SERVER_RETURN(send_file(req, rfile))

            # return directory index
            if req.document_index and path.isdir(rfile) \
                    and access(rfile, R_OK):
                req.log_error("Return directory: %s" % req.uri, LOG_INFO)
                req.uri_rule = '_directory_index_'
                req.uri_handler = directory_index
                self.handler_from_pre(req)      # call pre handlers now
                raise SERVER_RETURN(directory_index(req, rfile))

            raise SERVER_RETURN(HTTP_FORBIDDEN)
        # endif

        if req.debug and req.uri == '/debug-info':
            req.uri_rule = '_debug_info_'
            req.uri_handler = debug_info
            self.handler_from_pre(req)          # call pre handlers now
            raise SERVER_RETURN(debug_info(req, self))

        self.handler_from_default(req)

        req.log_error("404 Not Found: %s" % req.uri, LOG_ERR)
        raise SERVER_RETURN(HTTP_NOT_FOUND)
    # enddef

    def __request__(self, environ, start_response):
        """Create Request instance and return wsgi response.

        This method create Request object, call handlers from
        Application.__pre (Application.handler_from_pre),
        uri handler (handler_from_table), default handler
        (Application.handler_from_default) or error handler
        (Application.error_from_table), and handlers from
        Application.__post.
        """
        req = Request(environ, start_response, self.__config)

        try:
            self.handler_from_table(req)
        except SERVER_RETURN as e:
            code = e.args[0]
            if code in (OK, HTTP_OK, DONE):
                pass
            # XXX: elif code in (HTTP_MOVED_PERMANENTLY,
            #                    HTTP_MOVED_TEMPORARILY):
            else:
                req.status = code
                self.error_from_table(req, code)
        except (BrokenClientConnection, SystemExit) as e:
            req.log_error(str(e), LOG_ERR)
            req.log_error('   ***   You shoud ignore next error   ***',
                          LOG_ERR)
            return ()
        except:
            self.error_from_table(req, 500)
        # endtry

        try:    # call post_process handler
            for fn in self.__post:
                fn(req)
        except:
            self.error_from_table(req, 500)
        # endtry

        return req.__end_of_request__()    # private call of request
    # enddef

    def __call__(self, environ, start_response):
        """Callable define for Application instance.

        This method run __request__ method.
        """
        if self.__name == '__poorwsgi__':
            stderr.write("[W] Using deprecated instance of Application.\n")
            stderr.write("    Please, create your own instance\n")
            stderr.flush()
        return self.__request__(environ, start_response)

    def __profile_request__(self, environ, start_response):
        """Profiler version of __request__.

        This method is used if set_profile is used."""
        def wrapper(rv):
            rv.append(self.__original_request__(environ, start_response))

        rv = []
        uri_dump = (self._dump + environ.get('PATH_INFO').replace('/', '_')
                    + '.profile')
        self.log_error('Generate %s' % uri_dump, LOG_INFO)
        self._runctx('wrapper(rv)', globals(), locals(), filename=uri_dump)
        return rv[0]
    # enddef

    def __repr__(self):
        return '%s - callable Application class instance' % self.__name

    def set_profile(self, runctx, dump):
        """Set profiler for __call__ function.

        Arguments:
            runctx - function from profiler module
            dump - path and prefix for .profile files

        Typical usage:

            import cProfile

            cProfile.runctx('from simple import *', globals(), locals(),
                            filename="log/init.profile")
            app.set_profile(cProfile.runctx, 'log/req')
        """
        self._runctx = runctx
        self._dump = dump

        self.__original_request__ = self.__request__
        self.__request__ = self.__profile_request__
    # enddef

    def del_profile(self):
        """Remove profiler from application."""
        self.__request__ = self.__original_request__

    def get_options(self):
        """Returns dictionary with application variables from system environment.

        Application variables start with {app_} prefix,
        but in returned dictionary is set without this prefix.

            #!ini
            poor_LogLevel = warn        # Poor WSGI variable
            app_db_server = localhost   # application variable db_server
            app_templates = app/templ   # application variable templates

        This method works like Request.get_options, but work with
        os.environ, so it works only with wsgi servers, which set not only
        request environ, but os.environ too. Apaches mod_wsgi don't do that,
        uWsgi and PoorHTTP do that.
        """
        options = {}
        for key, val in environ.items():
            key = key.strip()
            if key[:4].lower() == 'app_':
                options[key[4:].lower()] = val.strip()
        return options
    # enddef

    def log_error(self, message, level=LOG_ERR):
        """Logging method with the same functionality like in Request object.

        But as get_options read configuration from os.environ which could
        not work in same wsgi servers like Apaches mod_wsgi.

        This method write to stderr so messages, could not be found in
        servers error log!
        """
        if self.__log_level[0] >= level[0]:
            if _unicode_exist and isinstance(message, unicode):
                message = message.encode('utf-8')
            try:
                stderr.write("<%s> [%s] %s\n" % (level[1], self.__name,
                                                 message))
            except UnicodeEncodeError:
                if _unicode_exist:
                    message = message.decode('utf-8').encode(
                        'ascii', 'backslashreplace')
                else:
                    message = message.encode(
                        'ascii', 'backslashreplace').decode('ascii')

                stderr.write("<%s> [%s] %s\n" % (level[1], self.__name,
                                                 message))
            stderr.flush()
    # enddef

    def log_info(self, message):
        """Logging method, which create message as LOG_INFO level."""
        self.log_error(message, LOG_INFO)

    def log_debug(self, message):
        """Logging method, which create message as LOG_DEBUG level."""
        self.log_error(message, LOG_DEBUG)

    def log_warning(self, message):
        """Logging method, which create message as LOG_WARNING level."""
        self.log_error(message, LOG_WARNING)
Exemple #45
0
def include_schema():

    for schema, value in globals.edg_schema.items()[:]:
        schema_data = OrderedDict()
        schema_files = [
            globals.edg_conf['conf']['metadata_info'][schema]['schema_path']
        ]

        # Continue to next schema if this schema has no other include files
        if not value.get('include_schema'):
            continue
        # Check for self inclusion
        if list(set(schema_files) & set(value.get('include_schema'))):
            print "Error: Cannot self include schema in %s : schema_include" % schema
            return False

        # Check for duplicate inclusion
        schema_files = value.get('include_schema')
        if len(list(set(schema_files))) != len(schema_files):
            print "Error: File duplication in %s : schema_include list " % schema
            return False

        for schema_file in schema_files:
            # Check whether file exist or not
            try:
                with open(schema_file) as f:
                    pass
            except IOError as e:
                print 'Error : Included schema file: "%s", do not exist' % schema_file
                return False

            # Parse JSON data in schema file
            schema_data_json_file = open(schema_file)
            str_data = bytearray(schema_data_json_file.read()).decode("utf-8")
            schema_data_json = re.sub(r'<Application>',
                                      globals.edg_schema[schema]['name'],
                                      str_data)
            try:
                schema_data[schema_file] = json.loads(
                    schema_data_json, object_pairs_hook=OrderedDict)
                schema_data_json_file.close()
            except ValueError as e:
                print "Error parsing JSON in included schema file : ", schema_file
                print e
                return False

            print "Include Schema File " + schema_file + " Parsed"

            # Check for more than one level of include
            if schema_data[schema_file].get('include_schema'):
                print "Error: more than one level of schema nesting in", schema, ": include_schema :", schema_file
                return False

            # Schema structure validation and expansion
            if 'name' not in schema_data[schema_file]:
                print schema_file, "schema does not contain mandatory 'name' tag"
                return False
            if 'fields' not in schema_data[schema_file]:
                print schema_file, "schema does not contain mandatory 'fields' tag"
                return False

            if not validate_fields(schema_file,
                                   schema_data[schema_file]['fields']):
                return False

        # Tweak for maintaining order of included fields
        ordered_fields = OrderedDict()
        for schema_file in schema_files:
            key_intersect = list(
                set(ordered_fields.keys())
                & set(schema_data[schema_file]['fields'].keys()))
            if (key_intersect):
                print "Following duplicate keys found in nested schema file: ", schema_file, "in", schema
                print key_intersect
                return False

            ordered_fields.update(schema_data[schema_file]['fields'])

        # Now add fields of the Top schema
        key_intersect = list(
            set(ordered_fields.keys()) & set(value['fields'].keys()))
        if (key_intersect):
            print "Following duplicate keys found in top level schema file of", schema
            print key_intersect
            return False

        ordered_fields.update(value['fields'])

        globals.edg_schema[schema]['fields'] = ordered_fields
        #value['fields'].update(schema_data[schema_file]['fields'])

    return True
Exemple #46
0
class Book:
    """
    Read an excel book that has mutliple sheets

    For csv file, there will be just one sheet
    """
    def __init__(self, filename=None, **keywords):
        """
        Book constructor

        Selecting a specific book according to filename extension
        """
        self.path = ""
        self.filename = "memory"
        self.name_array = []
        self.sheets = {}
        if is_string(type(filename)):
            if filename and os.path.exists(filename):
                self.load_from(filename, **keywords)
        elif isinstance(filename, tuple):
            self.load_from_memory(filename, **keywords)

    def load_from(self, file, **keywords):
        """Load content from physical file

        :param str file: the file name
        :param any keywords: additional parameters
        """
        path, filename = os.path.split(file)
        self.path = path
        self.filename = filename
        book = load_file(file, **keywords)
        sheets = book.sheets()
        self.load_from_sheets(sheets)

    def load_from_memory(self, the_tuple, **keywords):
        """Load content from memory content

        :param tuple the_tuple: first element should be file extension,
        second element should be file content
        :param any keywords: additional parameters
        """
        book = load_file(the_tuple, **keywords)
        sheets = book.sheets()
        self.load_from_sheets(sheets)

    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        self.sheets = OrderedDict()
        for name in sheets.keys():
            self.sheets[name] = self.get_sheet(sheets[name], name)
        self.name_array = list(self.sheets.keys())

    def get_sheet(self, array, name):
        """Create a sheet from a list of lists"""
        return Sheet(array, name)

    def __iter__(self):
        return SheetIterator(self)

    def number_of_sheets(self):
        """Return the number of sheets"""
        return len(self.name_array)

    def sheet_names(self):
        """Return all sheet names"""
        return self.name_array

    def sheet_by_name(self, name):
        """Get the sheet with the specified name"""
        return self.sheets[name]

    def sheet_by_index(self, index):
        """Get the sheet with the specified index"""
        if index < len(self.name_array):
            sheet_name = self.name_array[index]
            return self.sheets[sheet_name]

    def remove_sheet(self, sheet):
        if isinstance(sheet, int):
            if sheet < len(self.name_array):
                sheet_name = self.name_array[sheet]
                del self.sheets[sheet_name]
                self.name_array = list(self.sheets.keys())
            else:
                raise IndexError
        elif isinstance(sheet, str):
            if sheet in self.name_array:
                del self.sheets[sheet]
                self.name_array = list(self.sheets.keys())
            else:
                raise KeyError
        else:
            raise TypeError

    def __getitem__(self, key):
        if type(key) == int:
            return self.sheet_by_index(key)
        else:
            return self.sheet_by_name(key)

    def __delitem__(self, other):
        self.remove_sheet(other)
        return self

    def __add__(self, other):
        """Operator overloading

        example::

            book3 = book1 + book2
            book3 = book1 + book2["Sheet 1"]

        """
        content = {}
        a = to_dict(self)
        for k in a.keys():
            new_key = k
            if len(a.keys()) == 1:
                new_key = "%s_%s" % (self.filename, k)
            content[new_key] = a[k]
        if isinstance(other, Book):
            b = to_dict(other)
            for l in b.keys():
                new_key = l
                if len(b.keys()) == 1:
                    new_key = other.filename
                if new_key in content:
                    uid = uuid.uuid4().hex
                    new_key = "%s_%s" % (l, uid)
                content[new_key] = b[l]
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in content:
                uid = uuid.uuid4().hex
                new_key = "%s_%s" % (other.name, uid)
            content[new_key] = other.array
        else:
            raise TypeError
        c = Book()
        c.load_from_sheets(content)
        return c

    def __iadd__(self, other):
        """Operator overloading +=

        example::

            book += book2
            book += book2["Sheet1"]
        
        """
        if isinstance(other, Book):
            names = other.sheet_names()
            for name in names:
                new_key = name
                if len(names) == 1:
                    new_key = other.filename
                if new_key in self.name_array:
                    uid = uuid.uuid4().hex
                    new_key = "%s_%s" % (name, uid)
                self.sheets[new_key] = self.get_sheet(other[name].array,
                                                      new_key)
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in self.name_array:
                uid = uuid.uuid4().hex
                new_key = "%s_%s" % (other.name, uid)
            self.sheets[new_key] = self.get_sheet(other.array, new_key)
        else:
            raise TypeError
        self.name_array = list(self.sheets.keys())
        return self
def callservice(conn, schemaname, servicename, querystring):
    try:  
        t1 = datetime.datetime.now()
        # log the request - not enabled at the moment because of permission issues
#         logging.basicConfig(filename='/srv/www/dopa-services/cgi-bin/logs/REST_Services_Log.log', level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s',)
#         logging.info("REST REQUEST: " + web.ctx.home + web.ctx.path + web.ctx.query)
        # PARSE THE STANDARD OPTIONAL INPUT PARAMETERS
        # get the input parameters
        params = getQueryStringParams(querystring)  # the unquoting is to handle encoded parameters (like from extJS - 1,2,3 as a parameter becomes 1%2C2%2C3
        # get the standard optional parameters from the url 
        format = params.setdefault('format', 'json') 
        fields = params.setdefault('fields', '').split(",")  # fields will be passed as an array, e.g. iucn_species_id,wdpa_id
        includemetadata = params.setdefault('includemetadata', 'true')
        metadataName = params.setdefault('metadataname', 'metadata')
        rootName = params.setdefault('rootname', 'records')
        parseparams = params.setdefault('parseparams', 'true')
        sortField = params.setdefault('sortfield', '')
        decimalPlaceLimit = params.setdefault('dplimit', '2')
        isHadoop = ('true' if (servicename[-2:] == '_h') else 'false')  # if the service is a call to a hadoop method then set a flag 

        # remove the standard optional parameters from the dictionary so we are left with just the parameters required for the function
        del (params['format'], params['fields'], params['includemetadata'], params['parseparams'], params['metadataname'], params['rootname'], params['sortfield'], params['dplimit'])
        if 'callback' in params.keys():
            del(params['callback'])
        # check if the service name is valid
        if not (isValidServiceName(servicename)):
            raise RESTServicesError('Invalid servicename')
        
        # authorise with ecas if needed
#         if requiresAuthentication(servicename):
#             if isAuthenticated() == False:
#                 web.ctx.status = '401 Unauthorized'
#                 web.header("Content-Type", "text/html")
#                 return "<html><head></html><body><h1>Authentication required</h1></body></html>"

        # if it is a Hadoop query then we need to run if first before we actually use the values to get the data from postgresql 
        if (isHadoop.lower() == 'true'): 
            hadoopData = runHadoopQuery(conn, servicename, params)
            if hadoopData == '[]': hadoopData = '[-1]'
            servicename = "_" + servicename  # now call the postgresql function
            params.clear()
            params['species_ids'] = str(hadoopData)[1:-1];

        # PARSE AND CONVERT THE DATA TYPES OF THE OTHER INPUT PARAMETERS
        # get all the parameters for the function from postgresql
        conn.cur.callproc('utils.dopa_rest_getparams', [servicename])
        # get the function parameters as a string and split this into a list, e.g. wdpa_id integer, presence_id integer[] -->  ['wdpa_id integer', ' presence_id integer[]']
        functionparams = conn.cur.fetchone()
        hasparams = True if functionparams[0] else False
        if hasparams:
            functionparams = functionparams[0].split(',')  
            # get the names of the function parameters which are array types
            arrayparamnames = [p.strip().split(" ")[0] for p in functionparams if '[' in p]
            # convert the array values into lists
            for key in params.keys():
                if key in arrayparamnames:
                    strlist = params[key].split(",")
                    isnum = isNumeric(strlist[0])
                    if isnum:
                        params[key] = [int(s) for s in strlist]
                    else:
                        params[key] = strlist
            # get the full list of function parameter names
            functionparamnames = [p.strip().split(" ")[0] for p in functionparams]
            # check that all parameters are correct
            invalidparamnames = [n for n in params.keys() if n not in functionparamnames]
            if invalidparamnames and parseparams == 'true':
                raise RESTServicesError('Invalid parameters: ' + ",".join(invalidparamnames))
            # put the input parameters in the right order 
            params = OrderedDict([(n, params[n]) for n in functionparamnames if n in params.keys()])
            
        # GET THE SORT CLAUSE
        if sortField != "":
            sortClause = ' ORDER BY "' + sortField + '"'
        else:
            sortClause = ""
            
        # GET THE FIELDS CLAUSE
        if fields != ['']:
            fieldsClause = ",".join(fields)
        else:
            fieldsClause = "*"
        
        # RUN THE QUERY
        if hasparams :
            sql = "SELECT " + fieldsClause + " from " + schemaname + "." + servicename + "(" + ",".join([n + ":=%(" + n + ")s" for n in params]) + ")" + sortClause + ";"  # run the query using named parameters
            conn.cur.execute(sql, params)
        else:
            sql = "SELECT * from " + schemaname + "." + servicename + "()" + sortClause + ";" 
            conn.cur.execute(sql)  
        rows = conn.cur.fetchall()

        # PROCESS THE ROWS AND WRITE THEM BACK TO THE CLIENT
        conn.cur.close()
        t2 = datetime.datetime.now()
        
        # METADATA SECTION OF RESPONSE
        allfields = [d.name for d in conn.cur.description]
        if (fields == ['']): fields = allfields 
        fieldcount = len(fields)
        fieldsdict = [dict([("name", d.name), ("type", gettypefromtypecode(d.type_code))]) for d in conn.cur.description if (d.name in fields)]
        if len(fieldsdict) != len(fields):
            raise RESTServicesError('Invalid output fields')
        metadatadict = OrderedDict([("duration", str(t2 - t1)), ("error", None), ("idProperty", conn.cur.description[0].name), ("successProperty", 'success'), ("totalProperty", 'recordCount'), ("success", True), ("recordCount", int(conn.cur.rowcount)), ("root", rootName), ("fields", fieldsdict)])    
        
        # RECORDS SECTION OF THE RESPONSE
        # parse the float values and set the correct number of decimal places according to the decimalPlaceLimit variable - dont include lat/long fields as these must have more decimal places
        floatColumns = [i for i, d in enumerate(fieldsdict) if d['type'] == 'float' and d['name'] not in ['lat', 'lng']]
        if len(floatColumns) > 0:
            for floatColumn in floatColumns:
                for row in rows:
                    if type(row[floatColumn]) != NoneType:  # check that the data is not null
                        row[floatColumn] = round(row[floatColumn], int(decimalPlaceLimit))
            
        # return the data
        colsRequired = [allfields.index(field) for field in fields]
        if format in ['json', 'array']:
            if format == 'json':
                recordsdict = [OrderedDict([(allfields[col], row[col]) for col in range(fieldcount) if (col in colsRequired)]) for row in rows] 
            else:
                recordsdict = [[row[col] for col in range(fieldcount) if (col in colsRequired)] for row in rows]
            json.encoder.FLOAT_REPR = lambda f: ("%.14g" % f)  # this specifies how many decimal places are returned in the json with float values - currently set to 14 - good enough for returning lat/long coordinates
            if (includemetadata.lower() == 'true'):
                responsejson = json.dumps(dict([(metadataName, metadatadict), (rootName, recordsdict)]), indent=1, cls=CustomJSONEncoder)
            else: 
                responsejson = json.dumps(dict([(rootName, recordsdict)]), indent=1, cls=CustomJSONEncoder)
            return getJsonResponse(responsejson)
        
        elif format in ['xml', 'xmlverbose']:
            root = etree.Element('results')
            recordsnode = etree.Element(rootName)
            recordsdicts = [OrderedDict([(allfields[col], str(row[col]).decode('utf-8')) for col in range(fieldcount) if (col in colsRequired) and str(row[col]) != 'None']) for row in rows ]  #
            if format == 'xml':
                recordselements = [etree.Element('record', element) for element in recordsdicts]
                for recordelement in recordselements:
                    recordsnode.append(recordelement)
            else:
                for recordelement in recordsdicts:
                    record = etree.Element('record')
                    for (n, v) in recordelement.items():
                        el = etree.Element(n)
                        el.text = v
                        record.append(el)
                    recordsnode.append(record)
            root.append(recordsnode)
            web.header("Content-Type", "text/xml")
#             web.header("Content-Type", "application/Excel") # doesnt work!
#             web.header("Content-Disposition", "attachment; filename=test.xml")
            return etree.tostring(root)

        elif format == 'sms':
            _twilio = twilio()
            client = TwilioRestClient(_twilio.twilio_account_sid, _twilio.twilio_auth_token)  # use the twilio api account
            bodystr = 'Hi Andrew - test species data: '
            bodystr = bodystr + str(rows[0])[:160 - len(bodystr)]
            message = client.sms.messages.create(to="+393668084920", from_="+19712647662", body=bodystr)  # my mobile
            return message

        elif format == 'email':
            _amazon_ses = amazon_ses()
            amazonSes = AmazonSES(_amazon_ses.AccessKeyID, _amazon_ses.SecretAccessKey)  # use the amazon simple email service api account
            message = EmailMessage()
            message.subject = 'JRC REST Services Information Request'
            message.bodyHtml = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict) 
            result = amazonSes.sendEmail('*****@*****.**', '*****@*****.**', message)  # to me
            return result 
                    
        elif format == 'html':
            htmlData = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict) 
            web.header("Content-Type", "text/html") 
            return "<html><head></head><body>" + htmlData + "</body></html>"
        
        elif format == 'csv':
            data = [[row[col] for col in range(fieldcount) if (col in colsRequired)] for row in rows]
            colnames = ",".join([f["name"] for f in metadatadict["fields"]]) + "\n"
            output = colnames + "\n".join([p for p in [",".join(h) for h in [[getStringValue(col) for col in row] for row in data]]])
            filename = "dataDownload.csv" #hardcoded for now
            f = open(r'/tmp/' + filename, 'wb')
            f.write(output)
            f.close()
            web.header("Content-Type", "text/plain")
            web.header("Content-Disposition", "attachment; filename=%s" % filename)
            return output

        elif format == 'pdf':    
            config = pdfkit.configuration(wkhtmltopdf='/usr/local/bin/wkhtmltopdf')
            web.header("Content-Type", "application/pdf")
            htmlData = getResultsAsHTML(rows, fieldcount, colsRequired, metadatadict)
            return pdfkit.from_string(htmlData.decode('utf8'), False, configuration=config, options={'quiet': '', 'encoding': "UTF-8"})
        
        else:
            raise RESTServicesError('Invalid response format: ' + format)

    except (RESTServicesError, DataError, ProgrammingError, exceptions.TypeError, IndexError, IntegrityError, AmazonError, OperationalError) as e:
#        web.webapi.internalerror() #returns a internal server error 500
        t2 = datetime.datetime.now()
        msg = "There was an error sending the email. Make sure that the email address has been verified in Amazon Simple Email Services" if type(e) == AmazonError else e.message
        logging.error(msg + "\n")
        if type(e) == ProgrammingError:
            if ("column" in e.message) & ("does not exist" in e.message) & (sortField != ""):
                msg = "Invalid sortfield parameter: " + sortField
        return returnError(metadataName, rootName, t2 - t1, msg)
Exemple #48
0
class Controller(object):
    def __init__(self, args={}):
        self.args = args # arguments from command line
        self.config = {} # config to be processed from .dexy files
        self.docs = []
        self.timing = []
        self.virtual_docs = []

        self.batch_start_time = None
        self.batch_finish_time = None
        self.batch_elapsed_time = None

        # Set up logging
        if args.has_key("logsdir") and args.has_key("logfile"):
            self.log = dexy.utils.get_log("dexy.controller", args['logsdir'], args['logfile'], args['loglevel'])
        else:
            self.log = Constants.NULL_LOGGER

        # Set up db
        if args.has_key('dbclass') and args.has_key("logsdir") and args.has_key("dbfile"):
            self.db = dexy.utils.get_db(self.args['dbclass'], logsdir=self.args['logsdir'], dbfile=args['dbfile'])
        else:
            self.db = None

        # List of directories that reporters use, these will not be processed by dexy
        self.reports_dirs = dexy.introspect.reports_dirs(self.log)

        # list of artifact classes - if nothing else uses this then move
        # it into the if statement below and don't cache it

        self.artifact_classes = dexy.introspect.artifact_classes(self.log)
        if args.has_key('artifactclass'):
            if self.artifact_classes.has_key(args['artifactclass']):
                self.artifact_class = self.artifact_classes[args['artifactclass']]
            else:
                raise dexy.commands.UserFeedback("Artifact class name %s not found in %s" % (args['artifactclass'], ",".join(self.artifact_classes.keys())))


    def run(self):
        """
        This does all the work.
        """
        self.batch_start_time = time.time()
        start = self.batch_start_time

        self.log.debug("populating Document class filter list")
        dexy.document.Document.filter_list = dexy.introspect.filters(self.log)
        self.timing.append(("populate-filter-list", time.time() - start))
        start = time.time()

        self.log.debug("loading config...")
        self.load_config()
        self.log.debug("finished loading config.")
        self.timing.append(("load-config", time.time() - start))
        start = time.time()

        self.log.debug("processing config, populating document list...")
        self.process_config()
        self.log.debug("finished processing config.")
        self.timing.append(("process-config", time.time() - start))
        start = time.time()

        # set the list of documents which are virtual
        self.virtual_docs = [d for d in self.docs if d.virtual]

        try:
            if not self.args['dryrun']:
                [doc.setup() for doc in self.docs]
                self.docs = [doc.run() for doc in self.docs]
        except dexy.commands.UserFeedback as e:
            self.persist()
            raise e

        self.timing.append(("run-docs", time.time() - start))

        self.batch_finish_time = time.time()
        self.batch_elapsed_time = self.batch_finish_time - self.batch_start_time

        self.log.debug("persisting batch info...")
        self.persist()
        self.log.debug("finished persisting.")
        self.log.debug("finished processing. elapsed time %s" % self.batch_elapsed_time)

    def persist(self):
        """
        Persists the database. Saves some information about this batch in a
        JSON file (for use by reporters or for debugging).
        """
        self.db.persist()
        dexy.utils.save_batch_info(self.batch_id, self.batch_info(), self.args['logsdir'])

    def batch_info(self):
        """
        Dict of info to save
        """
        return {
            "id" : self.batch_id,
            "config" : self.config,
            "args" : self.args,
            "docs" : dict((doc.key(), doc.document_info()) for doc in self.docs),
            "start_time" : self.batch_start_time,
            "finish_time" : self.batch_finish_time,
            "elapsed" : self.batch_elapsed_time,
            "timing" : self.timing
            }

    def config_for_directory(self, path):
        """
        Determine the config applicable within a directory by looking in every
        parent directory (up as far as the dexy project root) for config files
        and combining them, such that subdirectories override parents.
        """
        self.log.debug("Determining configuration applicable in %s" % path)

        global_args = {}
        config_dict = {}
        variables = {}
        config_file = self.args['config']

        path_elements = path.split(os.sep)

        for i in range(0,len(path_elements)+1):
            config_path = os.path.join(*(path_elements[0:i] + [config_file]))
            config_files = glob.glob(config_path)

            # Don't propagate virtual files
            for k in config_dict.keys():
                propagate_virtual = config_dict[k].has_key('propagate') and config_dict[k]['propagate']
                if k.startswith("@") and not propagate_virtual:
                    del config_dict[k]

            for f in config_files:
                self.log.info("loading config file %s" % f)

                with open(f, "r") as cf:
                    try:
                        json_dict = json.load(cf)
                    except ValueError as e:
                        msg = "Your config file %s has invalid JSON\n%s" % (f, e.message)
                        raise dexy.commands.UserFeedback(msg)

                if json_dict.has_key("$reset"):
                    # Reset the config, i.e. ignore everything from parent
                    # directories, just use this directory's config in json_dict
                    config_dict = json_dict
                else:
                    # Combine any config in this dir with parent dir config.
                    config_dict.update(json_dict)

                if json_dict.has_key("$globals"):
                    global_args.update(json_dict["$globals"])

                if json_dict.has_key("$variables"):
                    variables.update(json_dict["$variables"])

        config_dict['$globals'] = global_args
        config_dict['$variables'] = variables
        return config_dict

    def load_config(self):
        """
        This method determines which subdirectories will be included in the
        dexy batch and populates the config dict for each of them.
        """
        if self.args['recurse']:

            # Figure out which directories need to be skipped
            exclude_at_root = Constants.EXCLUDE_DIRS_ROOT + self.reports_dirs + [self.args['artifactsdir'], self.args['logsdir']]
            self.log.debug("project root excluded directories %s" % ", ".join(exclude_at_root))

            exclude_everywhere = Constants.EXCLUDE_DIRS_ALL_LEVELS
            self.log.debug("directories excluded at all levels %s" % ", ".join(exclude_everywhere))

            for dirpath, dirnames, filenames in os.walk(self.args['directory']):
                # Figure out if we should process this directory and recurse
                # into its children. Start with process_dir = True
                process_dir = True

                # Remove any children we don't want to recurse into.
                if dirpath == ".":
                    for x in exclude_at_root:
                        if x in dirnames:
                            dirnames.remove(x)
                for x in exclude_everywhere:
                    if x in dirnames:
                        dirnames.remove(x)

                # Look for a .nodexy file
                if os.path.isfile(os.path.join(dirpath, '.nodexy')):
                    # If we find one...
                    self.log.info(".nodexy file found in %s" % dirpath)

                    # ...remove all child dirs from processing...
                    for i in xrange(len(dirnames)):
                        dirnames.pop()

                    # ...and skip this directory.
                    process_dir = False

                # Check if we match any excludes specified on the command line
                args_exclude = self.args['exclude']
                if isinstance(args_exclude, str):
                    args_exclude = args_exclude.split()
                for pattern in args_exclude:
                    for d in dirnames:
                        m1 = re.match(pattern, d)
                        m2 = re.match("./%s" % pattern, d)
                        m3 = re.match("%s/" % pattern, d)
                        m4 = re.match("./%s/" % pattern, d)
                        if m1 or m2 or m3 or m4:
                            dirnames.remove(d)

                if process_dir:
                    self.config[dirpath] = self.config_for_directory(dirpath)
            else:
                # Not recursing
                dirpath = self.args['directory']
                self.config[dirpath] = self.config_for_directory(dirpath)

    def process_config(self):
        """
        Processes a populated config dict, identifies files to be processed,
        creates Document objects for each, links dependencies and finally does
        topological sort to establish order of batch run.
        """

        # Define the parse_doc nested function which we will call recursively.
        def parse_doc(path, input_directive, args = {}):
            # If a specification is nested in a dependency, then input_directive
            # may be a dict. If so, split it into parts before continuing.
            try:
                a, b = input_directive.popitem()
                input_directive = a
                args = b
            except AttributeError:
                pass

            tokens = input_directive.split("|")
            if "/" in tokens[0]:
                raise dexy.commands.UserFeedback("paths not allowed in tokens: %s" % tokens[0])
            if path == '.':
                glob_string = tokens[0]
            else:
                glob_string = os.path.join(re.sub("^\./", "", path), tokens[0])
            filters = tokens[1:]

            docs = []

            # virtual document
            if re.search("@", glob_string):
                virtual = True
                dangerous = any(k in ['url', 'repo', 'path'] for k in args)
                if dangerous and not self.args['danger']:
                    msg = "You are attempting to access a remote file %s." % glob_string
                    msg += " You must specify -danger option to do this.\n"
                    raise dexy.commands.UserFeedback(msg)
                glob_string = glob_string.replace("@", "")
            else:
                virtual = False

            regex = fnmatch.translate(glob_string).replace(".*", "(.*)")
            matcher = re.compile(regex)

            files = glob.glob(glob_string)

            nofiles = len(files) == 0

            if nofiles and virtual:
                files = [glob_string]

            for f in files:
                create = True
                if not virtual:
                    if os.path.isdir(f):
                        create = False

                if args.has_key('disabled'):
                    if args['disabled']:
                        create = False
                        self.log.warn("document %s|%s disabled" % (f, "|".join(filters)))

                inputs = []
                if args.has_key('inputs'):
                    if isinstance(args['inputs'], str) or isinstance(args['inputs'], unicode):
                        raise dexy.commands.UserFeedback("inputs for %s should be an array" % f)
                    for i in args['inputs']:
                        # Create document objects for input patterns (just in this directory)
                        for doc in parse_doc(path, i):
                            inputs.append(doc.key())


                m = matcher.match(f)
                if m and len(m.groups()) > 0:
                    rootname = matcher.match(f).group(1)

                # The 'ifinput' directive says that if an input exists matching
                # the specified pattern, we should create this document and it
                # will depend on the specified input.
                if args.has_key('ifinput'):
                    if isinstance(args['ifinput'], str) or isinstance(args['ifinput'], unicode):
                        ifinputs = [args['ifinput']]
                    else:
                        self.log.debug("treating input %s as iterable. class: %s" % (
                            args['ifinput'], args['ifinput'].__class__.__name__))
                        ifinputs = args['ifinput']

                    for s in ifinputs:
                        self.log.debug("evaluating ifinput %s" % s)
                        ifinput = s.replace("%", rootname)
                        self.log.debug("evaluating ifinput %s" % ifinput)
                        input_docs = parse_doc(path, ifinput, {})
                        for input_doc in input_docs:
                            inputs.append(input_doc.key())

                    if len(input_docs) == 0:
                        create = False

                if args.has_key('ifnoinput'):
                    ifinput = args['ifnoinput'].replace("%", rootname)
                    input_docs = parse_doc(path, ifinput, {})

                    if len(input_docs) > 0:
                        create = False

                if args.has_key('except'):
                    try:
                        except_re = re.compile(args['except'])
                    except sre_constants.error as e:
                        raise dexy.commands.UserFeedback("""You passed 'except' value of %s.
Please pass a valid Python-style regular expression for
'except', NOT a glob-style matcher. Error message from
re.compile: %s""" % (args['except'], e))
                    if re.match(except_re, f):
                        self.log.warn("skipping %s for %s as it matches except pattern %s" % (
                                f,
                                input_directive,
                                args['except']
                                ))
                        create = False

                if create:
                    doc = dexy.document.Document()
                    doc.set_controller(self)

                    # Filters can either be included in the name...
                    doc.set_name_and_filters(f, filters)
                    # ...or they may be listed explicitly.
                    if args.has_key('filters'):
                        doc.filters += args['filters']

                    if args.has_key('loglevel'):
                        doc.loglevelname = args['loglevel']
                    doc.setup_log() # After name has been set
                    doc.virtual = virtual

                    key = doc.key()
                    self.log.debug("creating doc %s for glob %s" % (key, glob_string))

                    if self.members.has_key(key):
                        doc = self.members[key]

                    if args.has_key('priority'):
                        doc.priority = args['priority']
                        del args['priority']

                    doc.args.update(args)

                    if args.has_key('allinputs'):
                        doc.use_all_inputs = args['allinputs']

                    if args.has_key('inputs'):
                        doc.input_args = copy.copy(args['inputs'])
                        doc.input_keys = []

                    for i in inputs:
                        doc.add_input_key(i)

                    self.members[key] = doc
                    docs.append(doc) # docs is a local list of docs

            return docs # end of parse_doc nested function

        def get_pos(member):
            key = member.key()
            return self.members.keys().index(key)

        def depend(parent, child):
            self.depends.append((get_pos(child), get_pos(parent)))

        # The real processing starts here.
        self.members = OrderedDict()
        self.depends = []

        self.batch_id = self.db.next_batch_id()
        if not self.args['silent']:
            print "batch id is", self.batch_id

        for path, config in self.config.iteritems():
            ### @export "features-global-args-1"
            if config.has_key("$globals"):
                global_args = config["$globals"]
            else:
                global_args = {}

            if config.has_key("$variables"):
                global_variables = config["$variables"]
            else:
                global_variables = {}

            if self.args.has_key('globals'):
                global_args.update(self.args['globals'])

            for k, v in config.iteritems():
                local_args = global_args.copy()
                local_args.update(v)
                local_args['$variables'] = global_variables
                for kg in global_args.keys():
                    if local_args.has_key(kg):
                        if isinstance(local_args[kg], dict):
                            local_args[kg].update(global_args[kg])
                parse_doc(path, k, local_args)
            ### @end

        # Determine dependencies
        total_dependencies = 0
        self.log.debug("Finalizing dependencies between documents...")
        for doc in self.members.values():
            doc.finalize_inputs(self.members)
            total_dependencies += len(doc.inputs)
            for input_doc in doc.inputs:
                depend(doc, input_doc)

            self.log.debug("finalized dependencies for %s" % doc.key())
            if len(doc.inputs) > 10:
                self.log.debug("%s inputs added" % len(doc.inputs))
            elif len(doc.inputs) == 0:
                self.log.debug("no inputs added")
            else:
                self.log.debug("inputs added: %s" % ", ".join(d.key() for d in doc.inputs))

        if len(self.args['run']) > 0:
            # Only run the specified document, and its dependencies.
            new_members = OrderedDict()
            new_depends = []

            def new_get_pos(member):
                key = member.key()
                return new_members.keys().index(key)

            def new_depend(parent, child):
                new_depends.append((new_get_pos(child), new_get_pos(parent)))

            def parse_new_document(d):
                new_members[d.key()] = d
                for input_doc in d.inputs:
                    if not input_doc.key() in new_members.keys():
                        new_members[input_doc.key()] = input_doc
                    new_depend(d, input_doc)
                    parse_new_document(input_doc)

            run_key = self.args['run']
            if self.members.has_key(run_key):
                doc = self.members[run_key]
            else:
                matches = [k for k in self.members.keys() if k.startswith(run_key)]
                matches.sort(key=lambda k: len(self.members[k].inputs))
                doc = self.members[matches[-1]]
            parse_new_document(doc)

            if not self.args['silent']:
                print "limiting members list to %s and its dependencies, %s/%s documents will be run" % (doc.key(), len(new_members), len(self.members))
            self.members = new_members
            self.depends = new_depends

        num_members = len(self.members)
        if num_members > 0:
            dep_ratio = float(total_dependencies)/num_members
        else:
            dep_ratio = None

        if not self.args['silent']:
            print "sorting %s documents into run order, there are %s total dependencies" % (num_members, total_dependencies)
            if dep_ratio:
                print "ratio of dependencies to documents is %0.1f" % (dep_ratio)
                if dep_ratio > 10:
                    print "if you are experiencing performance problems:"
                    print "call dexy with -dryrun and inspect logs/batch-XXXX.json to debug dependencies"
                    print "consider using -strictinherit or reducing your use of 'allinputs' "

        try:
            self.log.debug("Beginning topological sort...")
            topsort_ordering = topsort(self.depends)
            self.log.debug("Topological sort completed successfully.")
        except CycleError as e:
            print "There are circular dependencies!"
            answer, num_parents, children = e.args
            for child, parents in children.items():
                for parent in parents:
                    print "%s depends on %s" % (self.members.keys()[parent], self.members.keys()[child])
            raise dexy.commands.UserFeedback(e.message)

        docs_without_dependencies = frozenset(range(len(self.members))) - frozenset(topsort_ordering)
        self.ordering = topsort_ordering + list(docs_without_dependencies)

        for i in self.ordering:
            key = self.members.keys()[i]
            self.docs.append(self.members[key])
Exemple #49
0
def group_all_export(request, group_slug):
    """
    Export all group members for a specific group
    """
    group = get_object_or_404(Group, slug=group_slug)

    # if they can edit it, they can export it
    if not has_perm(request.user,'user_groups.change_group', group):
        raise Http403

    import xlwt
    from ordereddict import OrderedDict
    from django.db import connection
    from forms_builder.forms.models import FieldEntry

    # create the excel book and sheet
    book = xlwt.Workbook(encoding='utf8')
    sheet = book.add_sheet('Group Members and Subscribers')
    
    #initialize indexes
    row_index = {}
    col_index = {}
    
    #---------
    # MEMBERS
    #---------
    # excel date styles
    default_style = xlwt.Style.default_style
    datetime_style = xlwt.easyxf(num_format_str='mm/dd/yyyy hh:mm')
    date_style = xlwt.easyxf(num_format_str='mm/dd/yyyy')
    
    # the key is what the column will be in the
    # excel sheet. the value is the database lookup
    # Used OrderedDict to maintain the column order
    group_mappings = OrderedDict([
        ('user_id', 'au.id'),
        ('first_name', 'au.first_name'),
        ('last_name', 'au.last_name'),
        ('email', 'au.email'),
        ('receives email', 'pp.direct_mail'),
        ('company', 'pp.company'),
        ('address', 'pp.address'),
        ('address2', 'pp.address2'),
        ('city', 'pp.city'),
        ('state', 'pp.state'),
        ('zipcode', 'pp.zipcode'),
        ('country', 'pp.country'),
        ('phone', 'pp.phone'),
        ('is_active', 'au.is_active'),
        ('date', 'gm.create_dt'),
    ])
    group_lookups = ','.join(group_mappings.values())

    # Use custom sql to fetch the rows because we need to
    # populate the user profiles information and you
    # cannot do that with django's ORM without using
    # get_profile() for each user query
    # pulling 13,000 group members can be done in one
    # query using Django's ORM but then you need
    # 13,000 individual queries :(
    cursor = connection.cursor()
    sql = "SELECT %s FROM user_groups_groupmembership gm \
           INNER JOIN auth_user au ON (au.id = gm.member_id) \
           LEFT OUTER JOIN profiles_profile pp \
           on (pp.user_id = gm.member_id) WHERE group_id = %%s;"
    sql =  sql % group_lookups
    cursor.execute(sql, [group.pk])
    values_list = list(cursor.fetchall())

    # index the group key mappings and insert them into the sheet.
    for key in group_mappings.keys():
        if not key in col_index:
            col = len(col_index.keys())
            col_index[key] = col
            sheet.write(0, col, key, style=default_style)

    if values_list:
        # Write the data enumerated to the excel sheet
        for row, row_data in enumerate(values_list):
            for col, val in enumerate(row_data):
                
                if not row in row_index:
                    # assign the row if it is not yet available
                    row_index[row] = row + 1
                
                # styles the date/time fields
                if isinstance(val, datetime):
                    style = datetime_style
                elif isinstance(val, date):
                    style = date_style
                else:
                    style = default_style
                
                sheet.write(row + 1, col, val, style=style)
    
    #-------------
    # Subscribers
    #-------------
    entries = FieldEntry.objects.filter(entry__subscriptions__group=group).distinct()
    
    for entry in entries:
        val = entry.value
        field = entry.field.label.lower().replace(" ", "_")
        
        if "subscriber %s" % str(entry.entry.pk) in row_index:
            # get the subscriber's row number
            row = row_index["subscriber %s" % str(entry.entry.pk)]
        else:
            # assign the row if it is not yet available
            row = len(row_index.keys()) + 1
            row_index["subscriber %s" % str(entry.entry.pk)] = row
        
        if field in col_index:
            # get the entry's col number
            col = col_index[field]
        else:
            # assign the col if it is not yet available
            # and label the new column
            col = len(col_index.keys())
            col_index[field] = col
            sheet.write(0, col, field, style=default_style)
            
        # styles the date/time fields
        if isinstance(val, datetime):
            style = datetime_style
        elif isinstance(val, date):
            style = date_style
        else:
            style = default_style
        
        sheet.write(row, col, val, style=style)

    response = HttpResponse(mimetype='application/vnd.ms-excel')
    response['Content-Disposition'] = 'attachment; filename=group_%s_all_export.xls' % group.pk
    book.save(response)
    return response
 def build_dict(self, res):
     d = OrderedDict()
     for i in res:
         if i['d'] not in d.keys():
             d[i['d']] = i['ex']
     return d
Exemple #51
0
class Artifact(object):
    HASH_WHITELIST = Constants.ARTIFACT_HASH_WHITELIST
    MAX_DATA_DICT_DECIMALS = 5
    MAX_DATA_DICT_LENGTH = 10 ** MAX_DATA_DICT_DECIMALS
    META_ATTRS = [
        'additional_inputs',
        'binary_input',
        'binary_output',
        'created_by',
        'document_key',
        'ext',
        'final',
        'hashfunction',
        'initial',
        'logstream',
        'key',
        'name',
        'output_hash',
        'state',
        'stdout',
        'virtual'
    ]

    BINARY_EXTENSIONS = [
        '.docx',
        '.eot',
        '.epub',
        '.gif',
        '.gz',
        '.jpg',
        '.kch',
        '.odt',
        '.pdf',
        '.png',
        '.rtf',
        '.sqlite',
        '.sqlite3',
        '.swf',
        '.tgz',
        '.ttf',
        '.wav',
        '.woff',
        '.xls',
        '.zip'
    ]

    def __init__(self):
        if not hasattr(self.__class__, 'FILTERS'):
            self.__class__.FILTERS = dexy.introspect.filters(Constants.NULL_LOGGER)

        self._inputs = {}
        self.additional = None
        self.additional_inputs = []
        self.args = {}
        self.args['globals'] = {}
        self.artifacts_dir = 'artifacts' # TODO don't hard code
        self.batch_id = None
        self.batch_order = None
        self.binary_input = None
        self.binary_output = None
        self.controller_args = {}
        self.controller_args['globals'] = {}
        self.created_by = None
        self.ctime = None
        self.data_dict = OrderedDict()
        self.dexy_version = Version.VERSION
        self.dirty = False
        self.document_key = None
        self.elapsed = 0
        self.ext = None
        self.final = None
        self.finish_time = None
        self.hashfunction = 'md5'
        self.initial = None
        self.inode = None
        self.input_data_dict = OrderedDict()
        self.is_last = False
        self.key = None
        self.log = logging.getLogger()
        self.logstream = ""
        self.mtime = None
        self.name = None
        self.source = None
        self.start_time = None
        self.state = 'new'
        self.stdout = None
        self.virtual_docs = None

    def keys(self):
        return self.data_dict.keys()

    def may_have_kv_storage(self):
        return self.binary_output and (self.ext in dexy.helpers.KeyValueData.EXTENSIONS)

    def __getitem__(self, key):
        if not hasattr(self, "_storage") and self.binary_output and (self.ext in dexy.helpers.KeyValueData.EXTENSIONS):
            self.setup_kv_storage()

        if hasattr(self, "_storage"):
            if self._storage.mode == "write":
                # Change from write mode to read mode...
                self.setup_kv_storage()
            return self._storage.retrieve(key)
        elif self.data_dict.has_key(key):
            return self.data_dict[key]
        elif hasattr(self, key):
            return getattr(self, key)
        elif self.ext in dexy.helpers.KeyValueData.EXTENSIONS:
            self.setup_kv_storage()
            return self._storage.retrieve(key)
        else:
            raise dexy.commands.UserFeedback("Can't find key '%s' in %s" % (key, self.key))

    def __unicode__(self):
        """
        When d[key] is used without attributes being accessed, need to return artifact output text.

        Jinja calls the __unicode__ method so we override that.
        """
        return self.output_text()

    def is_complete(self):
        return str(self.state) == 'complete'

    @classmethod
    def retrieve(klass, hashstring, hashfunction='md5'):
        if not hasattr(klass, 'retrieved_artifacts'):
            klass.retrieved_artifacts = {}
        if klass.retrieved_artifacts.has_key(hashstring):
            return klass.retrieved_artifacts[hashstring]
        else:
            artifact = klass()
            artifact.hashstring = hashstring
            artifact.hashfunction = hashfunction
            artifact.load()
            klass.retrieved_artifacts[hashstring] = artifact
            return artifact

    def load(self):
        self.load_meta()
        self.load_input()
        if self.is_complete() and not self.is_loaded():
            self.load_output()

    def load_inputs(self):
        for a in self.inputs():
            a.load()

    def save(self):
        if self.is_abstract():
            pass # For testing.
        elif not self.hashstring:
            raise Exception("can't persist an artifact without a hashstring!")
        else:
            self.save_meta()
            if self.is_complete() and not self.is_output_cached():
                try:
                    self.save_output()
                except IOError as e:
                    print "An error occured while saving %s" % self.key
                    raise e

    def is_abstract(self):
        return not hasattr(self, 'save_meta')

    def filter_args(self):
        """
        Returns args specified in the .dexy file for this filter alias.
        """
        args = {}
        for a in self.filter_class.ALIASES:
            if self.args.has_key(a):
                try:
                    args.update(self.args[a])
                except ValueError as e:
                    if "dictionary update sequence element" in e.message:
                        raise dexy.commands.UserFeedback("You need to supply a dict to argument '%s', rather than the single value '%s'" % (a, self.args[a]))
                    else:
                        print self.args[a]
                        raise e
        return args

    def setup_initial(self):
        """
        Set up an initial artifact (the first artifact in a document's filter chain).
        """
        if self.args.has_key('binary'):
            self.binary_input = self.args['binary']
        else:
            self.binary_input = (self.doc.ext in self.BINARY_EXTENSIONS)

        self.binary_output = self.binary_input
        self.ext = self.doc.ext
        self.initial = True
        self.virtual = self.doc.virtual
        self.virtual_docs = self.doc.virtual_docs

        if self.args.has_key('final'):
            self.final = self.args['final']
        elif os.path.basename(self.name).startswith("_"):
            self.final = False

        if not self.doc.virtual:
            stat_info = os.stat(self.name)
            self.ctime = stat_info[stat.ST_CTIME]
            self.mtime = stat_info[stat.ST_MTIME]
            self.inode = stat_info[stat.ST_INO]

        self.set_data(self.doc.initial_artifact_data())

        # TODO remove?
        if not self.data_dict:
            raise Exception("no data dict!")
        elif len(self.data_dict) == 0:
            raise Exception("data dict has len 0!")

        self.state = 'complete'

    def setup_from_filter_class(self):
        # cache filter class source code so it only has to be calculated once
        filter_class_source_const = "SOURCE_CODE_%s" % self.filter_class.__name__
        if not hasattr(self.filter_class, filter_class_source_const):
            # get source code of this filter class + all parent filter classes.
            source = ""
            klass = self.filter_class

            # get source code from filter class and all parent classes
            while klass != dexy.dexy_filter.DexyFilter:
                source += inspect.getsource(klass)
                klass = klass.__base__

            # and then get source code of DexyFilter class
            source += inspect.getsource(dexy.dexy_filter.DexyFilter)

            filter_class_source_hash = self.compute_hash(source)
            setattr(self.filter_class, filter_class_source_const, filter_class_source_hash)
            assert filter_class_source_hash == getattr(self.filter_class, filter_class_source_const)
            self.log.debug("Source code hash for %s is %s" % (self.filter_class.__name__, filter_class_source_hash))

        if not hasattr(self.filter_class, 'VERSION'):
            filter_version = self.filter_class.version(self.log)
            self.filter_class.VERSION = filter_version

        self.filter_name = self.filter_class.__name__
        self.filter_source = getattr(self.filter_class, filter_class_source_const)
        self.filter_version = self.filter_class.VERSION

        if self.final is None:
            self.final = self.filter_class.FINAL

    def setup_from_previous_artifact(self, previous_artifact):
        for a in ['args', 'final', 'mtime', 'ctime', 'inode', 'virtual', 'virtual_docs']:
                setattr(self, a, getattr(previous_artifact, a))

        # Look for additional inputs in previous artifacts or previous
        # artifacts' inputs.
        for k, a in previous_artifact.inputs().iteritems():
            if a.additional and not k in self._inputs:
                self.add_input(k, a)
            elif not k in self._inputs and not a.virtual:
                # We should have all other inputs already. Validate this.
                raise Exception("Missing input %s" % k)

            for kk, aa in a.inputs().iteritems():
                if aa.additional and not kk in self._inputs:
                    self.add_input(kk, aa)

        self.binary_input = previous_artifact.binary_output
        self.input_data_dict = previous_artifact.data_dict
        self.input_ext = previous_artifact.ext
        self.previous_artifact_hashstring = previous_artifact.hashstring
        self.previous_artifact_filename = previous_artifact.filename()
        self.previous_artifact_filepath = previous_artifact.filepath()
        self.previous_canonical_filename = previous_artifact.canonical_filename(True)
        self.previous_long_canonical_filename = previous_artifact.long_canonical_filename()
        self.previous_websafe_key = previous_artifact.websafe_key()

        # The JSON output of previous artifact
        if not previous_artifact.binary_output:
            self.previous_cached_output_filepath = previous_artifact.cached_output_filepath()

        # Determine file extension of output
        if hasattr(self, 'next_filter_class'):
            next_inputs = self.next_filter_class.INPUT_EXTENSIONS
        else:
            next_inputs = None

        if self.filter_args().has_key('ext'):
            ext = self.filter_args()['ext']
            if not ext.startswith("."):
                ext = ".%s" % ext
            self.ext = ext
        else:
            self.ext = self.filter_class.output_file_extension(
                    previous_artifact.ext,
                    self.name,
                    next_inputs)

        self.binary_output = self.filter_class.BINARY
        if self.binary_output is None:
            self.set_binary_from_ext()

        self.state = 'setup'

    @classmethod
    def setup(klass, doc, artifact_key, filter_class = None, previous_artifact = None):
        """
        Create an Artifact instance and load all information needed to
        calculate its hashstring.
        """
        artifact = klass()
        artifact.key = artifact_key
        artifact.filter_class = filter_class
        artifact.is_last = (artifact.key == doc.key())

        # Add references for convenience
        artifact.artifacts_dir = doc.artifacts_dir
        artifact.controller_args = doc.controller.args
        artifact.hashfunction = doc.controller.args['hashfunction']
        artifact.db = doc.db
        artifact.doc = doc
        artifact.log = doc.log

        # These attributes are the same for all artifacts pertaining to a document
        artifact.args = doc.args
        artifact.batch_id = doc.batch_id
        artifact.document_key = doc.key()
        artifact.name = doc.name

        # Set batch order to next in sequence
        artifact.batch_order = artifact.db.next_batch_order(artifact.batch_id)

        next_filter_class = doc.next_filter_class()
        if next_filter_class:
            artifact.next_filter_name = next_filter_class.__name__
            artifact.next_filter_class = next_filter_class

        # Set inputs from original document inputs.
        artifact._inputs.update(artifact.doc.input_artifacts())
        if len(artifact.doc.input_artifacts().keys()) > 10:
            doc.log.debug("Setting inputs to include %s document inputs" % len(artifact.doc.input_artifacts()))
        elif len(artifact.doc.input_artifacts().keys()) > 0:
            doc.log.debug("Setting inputs to include inputs: %s" % ",".join(artifact.doc.input_artifacts().keys()))

        for k, a in artifact.doc.input_artifacts().iteritems():
            if a.additional and not k in artifact._inputs:
                doc.log.debug("Adding additional input %s" % k)
                artifact.add_input(k, a)

            for kk, aa in a.inputs().iteritems():
                if aa.additional and not kk in artifact._inputs:
                    doc.log.debug("Adding additional input %s" % kk)
                    artifact.add_input(kk, aa)

        if previous_artifact:
            artifact.setup_from_previous_artifact(previous_artifact)
            artifact.setup_from_filter_class()
        else:
            artifact.setup_initial()

        artifact.set_hashstring()

        return artifact

    def run(self):
        start = time.time()

        if self.controller_args['nocache'] or not self.is_complete():
            # We have to actually run things...
            if not self.filter_class:
                self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list)

            # Set up instance of filter.
            filter_instance = self.filter_class()
            filter_instance.artifact = self
            filter_instance.log = self.log

            # Make sure previous artifact is loaded.
            if not self.binary_input and len(self.input_text()) == 0:
                f = open(self.previous_artifact_filepath, "rb")
                self.data_dict['1'] = f.read()
                f.close()

            try:
                filter_instance.process()
            except dexy.commands.UserFeedback as e:
                messages = []
                err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters))
                messages.append("ERROR in %s (in filter '%s' - step %s of %s)" % err_msg_args)
                messages.append(e.message)

                for message in messages:
                    self.log.debug(message)

                messages.append("This exception information has been written to logs/dexy.log")
                messages.append("There may be more information in logs/dexy.log")
                if self.log.getEffectiveLevel() > logging.DEBUG:
                    messages.append("If you can't find clues in the log, try running again with -loglevel DEBUG")

                raise dexy.commands.UserFeedback("\n".join(messages))

            except dexy.commands.InternalDexyProblem as e:
                err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters))
                sys.stderr.write("ERROR in %s (in filter '%s' - step %s of %s)\n" % err_msg_args)
                raise e
            except Exception as e:
                traceback.print_tb(sys.exc_info()[2])
                err_msg_args = (self.doc.key(), self.filter_alias, self.doc.step, len(self.doc.filters))
                sys.stderr.write("ERROR in %s (in filter '%s' - step %s of %s)\n" % err_msg_args)
                if e.message:
                    raise dexy.commands.InternalDexyProblem("error class: %s\nerror message: %s" % (e.__class__.__name__, e.message))
                else:
                    raise dexy.commands.InternalDexyProblem("error class: %s" % e.__class__.__name__)

            if self.data_dict and len(self.data_dict) > 0:
                pass

            elif self.is_canonical_output_cached:
                self.state = 'complete'
                self.save()

            else:
                raise Exception("data neither in memory nor on disk")

            self.logstream = self.doc.logstream.getvalue()
            self.state = 'complete'
            self.source = 'run'
            self.save()
        else:
            self.source = 'cache'
            self.log.debug("using cached artifact for %s" % self.key)

            # make sure additional artifacts are added to db
            for a in self.inputs().values():
                if a.additional and not a.key in self.db.extra_keys:
                    a.batch_id = self.batch_id
                    self.db.append_artifact(a)

        self.elapsed = time.time() - start
        self.db.update_artifact(self)

    def add_additional_artifact(self, key_with_ext, ext=None):
        if not ext:
            ext = os.path.splitext(key_with_ext)[1]
        new_artifact = self.__class__()
        new_artifact.key = key_with_ext
        if ext.startswith("."):
            new_artifact.ext = ext
        else:
            new_artifact.ext = ".%s" % ext
        new_artifact.final = True
        new_artifact.hashfunction = self.hashfunction
        new_artifact.additional = True
        new_artifact.set_binary_from_ext()
        new_artifact.artifacts_dir = self.artifacts_dir
        new_artifact.inode = self.hashstring
        new_artifact.created_by = self.key
        new_artifact.virtual = True
        new_artifact.name = key_with_ext.split("|")[0]

        # TODO this is duplicated in setup_from_previous_artifact, should reorganize
        for at in ['batch_id', 'document_key', 'mtime', 'ctime', 'virtual_docs']:
                val = getattr(self, at)
                setattr(new_artifact, at, val)

        new_artifact.set_hashstring()
        self.log.debug("new artifact %s hashstring %s" % (key_with_ext, new_artifact.hashstring))
        self.add_input(key_with_ext, new_artifact)
        self.db.append_artifact(new_artifact) # append to db because not part of doc.artifacts
        return new_artifact

    def add_input(self, key, artifact):
        self._inputs[key] = artifact
        self.additional_inputs.append(artifact.hashstring)

    def inputs(self):
        return self._inputs

    def set_binary_from_ext(self):
        # TODO list more binary extensions or find better way to do this
        if self.ext in self.BINARY_EXTENSIONS:
            self.binary_output = True
        else:
            self.binary_output = False

    def set_data(self, data):
        self.data_dict['1'] = data

    def set_data_from_artifact(self):
        f = codecs.open(self.filepath(), "r", encoding="utf-8")
        self.data_dict['1'] = f.read()

    def is_loaded(self):
        return hasattr(self, 'data_dict') and len(self.data_dict) > 0

    def compute_hash(self, text):
        unicode_text = None

        if type(text) == unicode:
            unicode_text = text
        elif type(text) in [dict, list]:
            unicode_text = json.dumps(text)
        elif self.binary_input:
            pass
        else:
            unicode_text = unicode(text, encoding="utf-8")

        if unicode_text:
            text = unicode_text.encode("utf-8")

        if self.hashfunction == 'md5':
            h = hashlib.md5(text).hexdigest()

        elif self.hashfunction == 'sha1':
            h = hashlib.sha1(text).hexdigest()

        elif self.hashfunction == 'sha224':
            h = hashlib.sha224(text).hexdigest()

        elif self.hashfunction == 'sha256':
            h = hashlib.sha256(text).hexdigest()

        elif self.hashfunction == 'sha384':
            h = hashlib.sha384(text).hexdigest()

        elif self.hashfunction == 'sha512':
            h = hashlib.sha512(text).hexdigest()

        elif self.hashfunction == 'crc32':
            h = str(zlib.crc32(text) & 0xffffffff)

        elif self.hashfunction == 'adler32':
            h = str(zlib.adler32(text) & 0xffffffff)

        else:
            raise Exception("unexpected hash function %s" % self.hashfunction)

        return h

    def input_hashes(self):
        """
        Returns an OrderedDict of key, hashstring for each input artifact, sorted by key.
        """
        return OrderedDict((k, str(self.inputs()[k].hashstring)) for k in sorted(self.inputs()))

    def hash_dict(self):
        """
        Calculate and cache the elements used to compute the hashstring
        """
        if not hasattr(self.__class__, 'SOURCE_CODE'):
            artifact_class_source = inspect.getsource(self.__class__)
            artifact_py_source = inspect.getsource(Artifact)
            self.__class__.SOURCE_CODE = self.compute_hash(artifact_class_source + artifact_py_source)

        self.artifact_class_source = self.__class__.SOURCE_CODE

        if self.dirty:
            self.dirty_string = time.gmtime()

        hash_dict = OrderedDict()

        hash_dict['inputs'] = self.input_hashes()

        for k in self.HASH_WHITELIST:
            if self.__dict__.has_key(k):
                v = self.__dict__[k]
                if hasattr(v, 'items'):
                    hash_v = OrderedDict()
                    for k1 in sorted(v.keys()):
                        v1 = v[k1]
                        try:
                            if len(str(v1)) > 50:
                                raise Exception()
                            json.dumps(v1)
                            hash_v[str(k1)] = v1
                        except Exception:
                            # Use a hash if we will have problems saving to JSON
                            # or if the data is large (don't want to clutter up the DB,
                            # makes it harder to spot differences)
                            hash_v[str(k1)] = self.compute_hash(v1)
                else:
                    hash_v = str(v)
                hash_dict[str(k)] = hash_v
        return hash_dict

    def set_hashstring(self):
        if hasattr(self, 'hashstring'):
            raise Exception("setting hashstring twice")

        hash_data = str(self.hash_dict())
        self.hashstring = self.compute_hash(hash_data)

        try:
            original_document_key = self.document_key
            if not self.is_loaded():
                self.load()
            self.document_key = original_document_key
        except AttributeError as e:
            if not self.is_abstract():
                raise e
        except IOError as e:
            self.save_meta()

    def convert_if_not_unicode(self, s):
        if type(s) == unicode:
            return s
        elif s == None:
            return u""
        else:
            try:
                ut = unicode(s, encoding="utf-8")
                return ut
            except Exception as e:
                print "error occurred trying to convert text to unicode in", self.key
                raise e


    def input_text(self):
        return u"".join([self.convert_if_not_unicode(v) for k, v in self.input_data_dict.items()])

    def output_text(self):
        return u"".join([self.convert_if_not_unicode(v) for k, v in self.data_dict.items()])

    def read_binary_output(self):
        self.binary_output = True
        self.load_output()
        self.binary_output = False
        return self.binary_data

    def output(self):
        if not self.is_complete():
            raise Exception("can't call output unless complete!")

        if self.binary_output:
            if not hasattr(self, 'binary_data'):
                self.load_output()
            return self.binary_data
        else:
            return self.output_text()

    def relative_refs(self, relative_to_file):
        """How to refer to this artifact, relative to another."""

        doc_dir = os.path.dirname(relative_to_file)
        return [
                os.path.relpath(self.key, doc_dir),
                os.path.relpath(self.long_canonical_filename(), doc_dir),
                "/%s" % self.key,
                "/%s" % self.long_canonical_filename()
        ]

    def use_canonical_filename(self):
        """Returns the canonical filename after saving contents under this name
        in the artifacts directory."""
        self.write_to_file(os.path.join(self.artifacts_dir,
                                        self.canonical_filename()))
        return self.canonical_filename()

    def write_to_file(self, filename):
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname) and not dirname == '':
            os.makedirs(dirname)
        shutil.copyfile(self.filepath(), filename)

    def work_filename(self):
        return "%s.work%s" % (self.hashstring, self.input_ext)

    def generate_workfile(self, work_filename = None):
        if not work_filename:
            work_filename = self.work_filename()
        work_path = os.path.join(self.artifacts_dir, work_filename)
        work_file = codecs.open(work_path, "w", encoding="utf-8")
        work_file.write(self.input_text())
        work_file.close()

    def temp_filename(self, ext):
        return "%s.work%s" % (self.hashstring, ext)

    def open_tempfile(self, ext):
        tempfile_path = os.path.join(self.artifacts_dir, self.temp_filename(ext))
        codecs.open(tempfile_path, "w", encoding="utf-8")

    def temp_dir(self):
        return os.path.join(self.artifacts_dir, self.hashstring)

    def create_temp_dir(self, populate=False):
        tempdir = self.temp_dir()
        shutil.rmtree(tempdir, ignore_errors=True)
        os.mkdir(tempdir)

        if populate:
            # write all inputs to this directory, under their canonical names
            for input_artifact in self._inputs.values():
                filename = os.path.join(tempdir, input_artifact.canonical_filename())
                if os.path.exists(input_artifact.filepath()):
                    input_artifact.write_to_file(filename)
                    self.log.debug("Populating temp dir for %s with %s" % (self.key, filename))
                else:
                    self.log.warn("Not populating temp dir for %s with file %s, file does not exist (yet)" % (self.key, filename))

            # write the workfile to this directory under its canonical name
            previous = self.previous_artifact_filepath
            workfile = os.path.join(tempdir, self.previous_canonical_filename)
            if not os.path.exists(os.path.dirname(workfile)):
                os.makedirs(os.path.dirname(workfile))
            self.log.debug("Copying %s to %s" % (previous, workfile))
            shutil.copyfile(previous, workfile)

    def alias(self):
        """
        Whether this artifact includes an alias.
        """
        aliases = [k for k in self.key.split("|") if k.startswith("-")]
        if len(aliases) > 0:
            return aliases[0]

    def canonical_dir(self, ignore_args = False):
        return os.path.dirname(self.name)

    def canonical_basename(self, ignore_args = False):
        return os.path.basename(self.canonical_filename(ignore_args))

    def canonical_filename(self, ignore_args = False):
        fn = os.path.splitext(self.key.split("|")[0])[0]

        if self.args.has_key('canonical-name') and not ignore_args:
            parent_dir = os.path.dirname(fn)
            return os.path.join(parent_dir, self.args['canonical-name'])
        elif self.args.has_key('postfix') and not ignore_args:
            return "%s%s%s" % (fn, self.ext, self.args['postfix'])
        elif self.alias():
            return "%s%s%s" % (fn, self.alias(), self.ext)
        else:
            return "%s%s" % (fn, self.ext)

    def long_canonical_filename(self):
        if not "|" in self.key:
            return self.key.replace("|", "-")
        else:
            return "%s%s" % (self.key.replace("|", "-"), self.ext)

    def websafe_key(self):
        return self.long_canonical_filename().replace("/", "--")

    def web_safe_document_key(self):
        # duplicate, remove this alias
        return self.websafe_key()

    def filename(self):
        """
        The filename where artifact content is stored, based on the hashstring.
        """
        if not hasattr(self, 'ext'):
            raise Exception("artifact %s has no ext" % self.key)
        return "%s%s" % (self.hashstring, self.ext)

    def filepath(self):
        """
        Full path (including artifacts dir location) to location where artifact content is stored.
        """
        return os.path.join(self.artifacts_dir, self.filename())

    def abs_filepath(self):
        return os.path.abspath(self.filepath())

    def breadcrumbs(self):
        """A list of parent dirs, plus the filename if it's not 'index.html'."""
        parent_dirs = os.path.dirname(self.canonical_filename()).split("/")

        if self.canonical_basename() == "index.html":
            result = parent_dirs
        else:
            result = parent_dirs.append(self.canonical_basename())

        if not result:
            result = []

        return result

    def titleized_name(self):
        if self.canonical_basename() == "index.html":
            return self.breadcrumbs()[-1].replace("-"," ").title()
        else:
            return os.path.splitext(self.canonical_basename())[0].replace("-"," ").title()

    def unique_key(self):
        return "%s:%s:%s" % (self.batch_id, self.document_key, self.key)

    def websafe_unique_key(self):
        return self.unique_key().replace("/", "--")

    def url(self):
        # TODO test for final
        return "/%s" % self.canonical_filename()

    def hyperlink(self, link_text = None):
        # TODO test for final
        if not link_text:
            link_text = self.canonical_basename()

        return """<a href="%s">%s</a>""" % (self.url(), link_text)

    def iframe(self, link_text = None, width = "600px", height = "300px"):
        # TODO test for final
        args = {
                'url' : self.url(),
                'hyperlink' : self.hyperlink(link_text),
                'width' : width,
                'height' : height
        }

        return """
<iframe src="%(url)s" width="%(width)s" height="%(height)s" style="border: thin solid gray;">
%(hyperlink)s
</iframe>
        """ % args

    def img(self):
        # TODO test for final
        return """<img src="/%s" alt="Image generated by dexy %s" />""" % (self.canonical_filename(), self.key)

    def relpath(self, artifact_key):
        """
        Returns relative path from self to other artifact key, e.g. for linking to CSS relatively
        """
        artifact = self.inputs()[artifact_key]
        return os.path.join(self.relative_path_to_input(artifact), artifact.canonical_basename())

    def has_sections(self):
        return (self.data_dict.keys() != ['1'])

    def relative_path_to_input(self, input_artifact):
        my_dir = os.path.dirname(self.name)
        input_dir = os.path.dirname(input_artifact.name)
        self.log.debug("Calculating relative path between %s and %s" % (self.name, input_artifact.name))

        if not my_dir:
            my_dir = "."
        if not input_dir:
            input_dir = "."

        if my_dir == input_dir:
            relpath = ""
        else:
            relpath = os.path.relpath(input_dir, my_dir)
        return relpath

    def relative_key_for_input(self, input_artifact):
        relpath = self.relative_path_to_input(input_artifact)
        return os.path.join(relpath, os.path.basename(input_artifact.key))

    def convert_numbered_dict_to_ordered_dict(self, numbered_dict):
        ordered_dict = OrderedDict()
        for x in sorted(numbered_dict.keys()):
            k = x.split(":", 1)[1]
            ordered_dict[k] = numbered_dict[x]
        return ordered_dict

    def convert_data_dict_to_numbered_dict(self):
        if len(self.data_dict) >= self.MAX_DATA_DICT_LENGTH:
            exception_msg = """Your data dict has %s items, which is greater than the arbitrary limit of %s items.
            You can increase this limit by changing MAX_DATA_DICT_DECIMALS."""
            raise Exception(exception_msg % (len(self.data_dict), self.MAX_DATA_DICT_LENGTH))

        data_dict = {}
        i = -1
        for k, v in self.data_dict.iteritems():
            i += 1
            fmt = "%%0%sd:%%s" % self.MAX_DATA_DICT_DECIMALS
            data_dict[fmt % (i, k)] = v
        return data_dict

    def storage(self, reset=False):
        if not hasattr(self, "_storage") or reset:
            # Assume we want KV storage
            self.setup_kv_storage()
        return self._storage

    def key_prefixes(self):
        return sorted(set(":".join(k.split(":")[:-1]) for k in self.storage().keys()))

    def kv_storage(self):
        return self.storage()

    def row_storage(self):
        if not hasattr(self, "_storage"):
            self.setup_row()
        return self._storage

    def setup_kv_storage(self):
        try:
            self._storage = dexy.helpers.KeyValueData(self.filepath())
        except ValueError as e:
            raise dexy.commands.UserFeedback("Can't get key-value data from %s for %s: %s" % self.filepath(), self.key, e.message)

    def setup_row_storage(self):
        self._storage = dexy.helpers.RowData(self.filepath())
class ObservationRows:
    """Store index file information.

       The ObserservationRows class defines a structure to get specific
       information about the spectra out of the index file which was
       produced by the sdfits filler program.

       This is essientially a table of the raw SDFITS file rows, organized
       with a lookup key of scan/feed/window/polarization.

       When rows are added to this object (addRow), the FITS extension,
       row of the FITS table and scan type are stored.

       A list of rows for each scan/feed/window/polarization can be
       retrieved with the 'get' method.

    """
    def __init__(self):
        self.rows = OrderedDict()
        self.Key = namedtuple('key', 'scan, feed, window, polarization')

    def __repr__(self):
        return ('Scans: {0}\nFeeds: {1}\nWindows: {2}\nPols: {3}'.format(
            self.scans(), self.feeds(), self.windows(), self.pols()))

    def addRow(self, scan, feed, window, polarization, fitsExtension,
               rowOfFitsFile, obsid, procname, procscan, nchans):
        """Add rows to the ObservationRows object.

           When rows are added to this object (addRow), the FITS extension,
           row of the FITS table and scan type are stored.

        """

        key = self.Key(scan, feed, window, polarization)

        if key in self.rows:
            self.rows[key]['ROW'].append(rowOfFitsFile)
        else:
            self.rows[key] = {
                'EXTENSION': fitsExtension,
                'ROW': [rowOfFitsFile],
                'OBSID': obsid,
                'PROCNAME': procname,
                'PROCSCAN': procscan,
                'NCHANS': nchans
            }

    def get(self, scan, feed, window, polarization):
        """Retreive a list of rows for scan/feed/win/pol.

        """
        try:
            key = (scan, feed, window, polarization)
            return self.rows[key]
        except (KeyError):
            raise

    def scans(self):
        """Return a list of scans in the observation.

        """
        return sorted(list(set([xx.scan for xx in self.rows.keys()])))

    def feeds(self):
        """Return a list of feeds in the observation.

        """
        return list(set([xx.feed for xx in self.rows.keys()]))

    def windows(self):
        """Return a list of windows in the observation.

        """
        return list(set([xx.window for xx in self.rows.keys()]))

    def pols(self):
        """Return a list of polarizations in the observation.

        """
        return list(set([xx.polarization for xx in self.rows.keys()]))
Exemple #53
0
class Artifact(object):
    HASH_WHITELIST = Constants.ARTIFACT_HASH_WHITELIST
    META_ATTRS = [
        'additional',
        'binary_input',
        'binary_output',
        'created_by',
        'document_key',
        'ext',
        'final',
        'hashfunction',
        'initial',
        'is_last',
        'logstream',
        'key',
        'name',
        'output_hash',
        'state',
        'stdout'
    ]

    BINARY_EXTENSIONS = [
        '.gif',
        '.jpg',
        '.png',
        '.pdf',
        '.zip',
        '.tgz',
        '.gz',
        '.eot',
        '.ttf',
        '.woff',
        '.sqlite',
        '.sqlite3',
        '.swf'
    ]

    def __init__(self):
        if not hasattr(self.__class__, 'FILTERS'):
            self.__class__.FILTERS = dexy.introspect.filters(Constants.NULL_LOGGER)

        self._inputs = {}
        self.additional = None
        self.args = {}
        self.args['globals'] = {}
        self.artifacts_dir = 'artifacts' # TODO don't hard code
        self.batch_id = None
        self.batch_order = None
        self.binary_input = None
        self.binary_output = None
        self.controller_args = {}
        self.controller_args['globals'] = {}
        self.created_by = None
        self.ctime = None
        self.data_dict = OrderedDict()
        self.dexy_version = Version.VERSION
        self.dirty = False
        self.document_key = None
        self.elapsed = 0
        self.ext = None
        self.final = None
        self.finish_time = None
        self.hashfunction = 'md5'
        self.initial = None
        self.inode = None
        self.input_data_dict = OrderedDict()
        self.is_last = False
        self.key = None
        self.log = logging.getLogger()
        self.logstream = ""
        self.mtime = None
        self.name = None
        self.source = None
        self.start_time = None
        self.state = 'new'
        self.stdout = None

    def is_complete(self):
        return str(self.state) == 'complete'

    @classmethod
    def retrieve(klass, hashstring):
        if not hasattr(klass, 'retrieved_artifacts'):
            klass.retrieved_artifacts = {}
        if klass.retrieved_artifacts.has_key(hashstring):
            return klass.retrieved_artifacts[hashstring]
        else:
            artifact = klass()
            artifact.hashstring = hashstring
            artifact.load()
            klass.retrieved_artifacts[hashstring] = artifact
            return artifact

    def load(self):
        self.load_meta()
        self.load_input()
        if self.is_complete() and not self.is_loaded():
            self.load_output()

    def load_inputs(self):
        for a in self.inputs():
            a.load()

    def save(self):
        if self.is_abstract():
            pass # For testing.
        elif not self.hashstring:
            raise Exception("can't persist an artifact without a hashstring!")
        else:
            self.save_meta()
            if self.is_complete() and not self.is_output_cached():
                try:
                    self.save_output()
                except IOError as e:
                    print "An error occured while saving %s" % self.key
                    raise e

    def is_abstract(self):
        return not hasattr(self, 'save_meta')

    def setup_initial(self):
        """
        Set up an initial artifact (the first artifact in a document's filter chain).
        """
        self._inputs = self.doc.input_artifacts()
        self.binary_input = (self.doc.ext in self.BINARY_EXTENSIONS)
        self.binary_output = self.binary_input
        self.ext = self.doc.ext
        self.initial = True

        if self.args.has_key('final'):
            self.final = self.args['final']
        elif os.path.basename(self.name).startswith("_"):
            self.final = False

        if not self.doc.virtual:
            stat_info = os.stat(self.name)
            self.ctime = stat_info[stat.ST_CTIME]
            self.mtime = stat_info[stat.ST_MTIME]
            self.inode = stat_info[stat.ST_INO]

        self.set_data(self.doc.initial_artifact_data())

        # TODO remove?
        if not self.data_dict:
            raise Exception("no data dict!")
        elif len(self.data_dict) == 0:
            raise Exception("data dict has len 0!")

        self.state = 'complete'

    def setup_from_filter_class(self):
        # cache filter class source code so it only has to be calculated once
        if not hasattr(self.filter_class, 'SOURCE_CODE'):
            # get source code of this filter class + all parent filter classes.
            source = ""
            klass = self.filter_class

            # get source code from filter class and all parent classes
            while klass != dexy.dexy_filter.DexyFilter:
                source += inspect.getsource(klass)
                klass = klass.__base__

            # and then get source code of DexyFilter class
            source += inspect.getsource(dexy.dexy_filter.DexyFilter)

            filter_class_source = source
            self.filter_class.SOURCE_CODE = self.compute_hash(filter_class_source)

        if not hasattr(self.filter_class, 'VERSION'):
            filter_version = self.filter_class.version(self.log)
            self.filter_class.VERSION = filter_version

        self.filter_name = self.filter_class.__name__
        self.filter_source = self.filter_class.SOURCE_CODE
        self.filter_version = self.filter_class.VERSION

        if self.final is None:
            self.final = self.filter_class.FINAL

    def setup_from_previous_artifact(self, previous_artifact):
        for a in ['final', 'mtime', 'ctime', 'inode']:
                setattr(self, a, getattr(previous_artifact, a))

        self._inputs.update(previous_artifact.inputs())
        # Need to loop over each artifact's inputs in case extra ones have been
        # added anywhere.
        for k, a in previous_artifact.inputs().iteritems():
            self._inputs.update(a.inputs())
        self.binary_input = previous_artifact.binary_output
        self.input_data_dict = previous_artifact.data_dict
        self.input_ext = previous_artifact.ext
        self.previous_artifact_filename = previous_artifact.filename()
        self.previous_artifact_filepath = previous_artifact.filepath()
        self.previous_canonical_filename = previous_artifact.canonical_filename(True)

        # The JSON output of previous artifact
        if not previous_artifact.binary_output:
            self.previous_cached_output_filepath = previous_artifact.cached_output_filepath()

        # Determine file extension of output
        if hasattr(self, 'next_filter_class'):
            next_inputs = self.next_filter_class.INPUT_EXTENSIONS
        else:
            next_inputs = None

        self.ext = self.filter_class.output_file_extension(
                previous_artifact.ext,
                self.name,
                next_inputs)

        self.binary_output = self.filter_class.BINARY
        if self.binary_output is None:
            self.set_binary_from_ext()

        self.state = 'setup'

    @classmethod
    def setup(klass, doc, artifact_key, filter_class = None, previous_artifact = None):
        """
        Create an Artifact instance and load all information needed to
        calculate its hashstring.
        """
        artifact = klass()
        artifact.key = artifact_key
        artifact.filter_class = filter_class

        # Add references for convenience
        artifact.artifacts_dir = doc.artifacts_dir
        artifact.controller_args = doc.controller.args
        artifact.hashfunction = doc.controller.args['hashfunction']
        artifact.db = doc.db
        artifact.doc = doc
        artifact.log = doc.log

        # These attributes are the same for all artifacts pertaining to a document
        artifact.args = doc.args
        artifact.batch_id = doc.batch_id
        artifact.document_key = doc.key()
        artifact.name = doc.name

        # Set batch order to next in sequence
        artifact.batch_order = artifact.db.next_batch_order(artifact.batch_id)

        next_filter_class = doc.next_filter_class()
        if next_filter_class:
            artifact.next_filter_name = next_filter_class.__name__
            artifact.next_filter_class = next_filter_class

        if previous_artifact:
            artifact.setup_from_previous_artifact(previous_artifact)
            artifact.setup_from_filter_class()
        else:
            artifact.setup_initial()

        artifact.set_hashstring()
        return artifact

    def run(self):
        start = time.time()

        if self.controller_args['nocache'] or not self.is_complete():
            # We have to actually run things...
            if not self.filter_class:
                self.filter_class = dexy.introspect.get_filter_by_name(self.filter_name, self.doc.__class__.filter_list)

            # Set up instance of filter.
            filter_instance = self.filter_class()
            filter_instance.artifact = self
            filter_instance.log = self.log

            try:
                filter_instance.process()
            except Exception as e:
                print "Error occurred while running", self.key
                x, y, tb = sys.exc_info()
                print "Original traceback:"
                traceback.print_tb(tb)
                pattern = os.path.join(self.artifacts_dir, self.hashstring)
                files_matching = glob.glob(pattern)
                if len(files_matching) > 0:
                    print "Here are working files which might have clues about this error:"
                    for f in files_matching:
                        print f
                raise e

            h = hashlib.sha512()

            if self.data_dict and len(self.data_dict) > 0:
                h.update(self.output_text().encode("utf-8"))

            elif self.is_canonical_output_cached:
                self.state = 'complete'
                self.save()

                f = open(self.filepath(), "rb")
                while True:
                    data = f.read(h.block_size)
                    if not data:
                        break
                    h.update(data)

            else:
                raise Exception("data neither in memory nor on disk")

            self.output_hash = h.hexdigest()
            self.logstream = self.doc.logstream.getvalue()
            self.state = 'complete'
            self.source = 'run'
            self.save()
        else:
            self.source = 'cache'
            self.log.debug("using cached artifact for %s" % self.key)

            # make sure additional artifacts are added to db
            for a in self.inputs().values():
                if a.created_by == self.key:
                    if not a.additional:
                        raise Exception("created_by should only apply to additional artifacts")
                    # TODO Should this be done in Artifact.retrieve?
                    a.batch_id = self.batch_id
                    self.db.append_artifact(a)

        self.elapsed = time.time() - start
        self.db.update_artifact(self)

    def add_additional_artifact(self, key_with_ext, ext):
        """create an 'additional' artifact with random hashstring"""
        new_artifact = self.__class__()
        new_artifact.key = key_with_ext
        if ext.startswith("."):
            new_artifact.ext = ext
        else:
            new_artifact.ext = ".%s" % ext
        new_artifact.final = True
        new_artifact.hashfunction = self.hashfunction
        new_artifact.additional = True
        new_artifact.set_binary_from_ext()
        new_artifact.artifacts_dir = self.artifacts_dir
        new_artifact.inode = self.hashstring
        new_artifact.created_by = self.document_key
        # TODO filter class source?

        # TODO this is duplicated in setup_from_previous_artifact, should reorganize
        for at in ['batch_id', 'document_key', 'mtime', 'ctime']:
                val = getattr(self, at)
                setattr(new_artifact, at, val)

        new_artifact.set_hashstring()
        self.add_input(key_with_ext, new_artifact)
        self.db.append_artifact(new_artifact) # append to db because not part of doc.artifacts
        return new_artifact

    def add_input(self, key, artifact):
        self._inputs[key] = artifact

    def inputs(self):
        return self._inputs

    def set_binary_from_ext(self):
        # TODO list more binary extensions or find better way to do this
        if self.ext in self.BINARY_EXTENSIONS:
            self.binary_output = True
        else:
            self.binary_output = False

    def set_data(self, data):
        self.data_dict['1'] = data

    def set_data_from_artifact(self):
        f = codecs.open(self.filepath(), "r", encoding="utf-8")
        self.data_dict['1'] = f.read()

    def is_loaded(self):
        return hasattr(self, 'data_dict') and len(self.data_dict) > 0

    def compute_hash(self, text):
        if self.hashfunction == 'md5':
            unicode_text = None
            if type(text) == unicode:
                unicode_text = text
            else:
                unicode_text = unicode(text, encoding="utf-8")
            h = hashlib.md5(unicode_text.encode("utf-8")).hexdigest()
        elif self.hashfunction == 'crc32':
            h =str(zlib.crc32(str(text)) & 0xffffffff )
        elif self.hashfunction == 'adler32':
            h =str(zlib.adler32(str(text)) & 0xffffffff )
        else:
            raise Exception("unexpected hash function %s" % self.hashfunction)
        return h

    def input_hashes(self):
        """
        Returns an OrderedDict of key, hashstring for each input artifact, sorted by key.
        """
        return OrderedDict((k, str(self.inputs()[k].hashstring)) for k in sorted(self.inputs()))

    def hash_dict(self):
        """
        Calculate and cache the elements used to compute the hashstring
        """
        if not hasattr(self.__class__, 'SOURCE_CODE'):
            artifact_class_source = inspect.getsource(self.__class__)
            artifact_py_source = inspect.getsource(Artifact)
            self.__class__.SOURCE_CODE = self.compute_hash(artifact_class_source + artifact_py_source)

        self.artifact_class_source = self.__class__.SOURCE_CODE

        if self.dirty:
            self.dirty_string = time.gmtime()

        hash_dict = OrderedDict()

        hash_dict['inputs'] = self.input_hashes()

        for k in self.HASH_WHITELIST:
            if self.__dict__.has_key(k):
                v = self.__dict__[k]
                if hasattr(v, 'items'):
                    hash_v = OrderedDict()
                    for k1 in sorted(v.keys()):
                        v1 = v[k1]
                        try:
                            if len(str(v1)) > 50:
                                raise Exception()
                            json.dumps(v1)
                            hash_v[str(k1)] = v1
                        except Exception:
                            # Use a hash if we will have problems saving to JSON
                            # or if the data is large (don't want to clutter up the DB,
                            # makes it harder to spot differences)
                            hash_v[str(k1)] = self.compute_hash(v1)
                else:
                    hash_v = str(v)
                hash_dict[str(k)] = hash_v
        return hash_dict

    def set_hashstring(self):
        if hasattr(self, 'hashstring'):
            raise Exception("setting hashstring twice")

        hash_data = str(self.hash_dict())
        self.hashstring = self.compute_hash(hash_data)

        try:
            original_document_key = self.document_key
            if not self.is_loaded():
                self.load()
            self.document_key = original_document_key
        except AttributeError as e:
            if not self.is_abstract():
                raise e
        except IOError as e:
            self.save_meta()


    def convert_if_not_unicode(self, s):
        if type(s) == unicode:
            return s
        else:
            return unicode(s, encoding="utf-8")

    def input_text(self):
        return u"".join([self.convert_if_not_unicode(v) for k, v in self.input_data_dict.items()])

    def output_text(self):
        return u"".join([self.convert_if_not_unicode(v) for k, v in self.data_dict.items()])

    def relative_refs(self, relative_to_file):
        """How to refer to this artifact, relative to another."""

        doc_dir = os.path.dirname(relative_to_file)
        return [
                os.path.relpath(self.key, doc_dir),
                os.path.relpath(self.long_canonical_filename(), doc_dir),
                "/%s" % self.key,
                "/%s" % self.long_canonical_filename()
        ]

    def use_canonical_filename(self):
        """Returns the canonical filename after saving contents under this name
        in the artifacts directory."""
        self.write_to_file(os.path.join(self.artifacts_dir,
                                        self.canonical_filename()))
        return self.canonical_filename()

    def write_to_file(self, filename):
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname) and not dirname == '':
            os.makedirs(dirname)
        shutil.copyfile(self.filepath(), filename)

    def work_filename(self):
        return "%s.work%s" % (self.hashstring, self.input_ext)

    def generate_workfile(self, work_filename = None):
        if not work_filename:
            work_filename = self.work_filename()
        work_path = os.path.join(self.artifacts_dir, work_filename)
        work_file = codecs.open(work_path, "w", encoding="utf-8")
        work_file.write(self.input_text())
        work_file.close()

    def temp_filename(self, ext):
        return "%s.work%s" % (self.hashstring, ext)

    def open_tempfile(self, ext):
        tempfile_path = os.path.join(self.artifacts_dir, self.temp_filename(ext))
        codecs.open(tempfile_path, "w", encoding="utf-8")

    def temp_dir(self):
        return os.path.join(self.artifacts_dir, self.hashstring)

    def create_temp_dir(self, populate=False):
        tempdir = self.temp_dir()
        shutil.rmtree(tempdir, ignore_errors=True)
        os.mkdir(tempdir)

        if populate:
            # write all inputs to this directory, under their canonical names
            for input_artifact in self._inputs.values():
                filename = os.path.join(tempdir, input_artifact.canonical_filename())
                if os.path.exists(input_artifact.filepath()):
                    input_artifact.write_to_file(filename)
                    self.log.debug("Populating temp dir for %s with %s" % (self.key, filename))
                else:
                    self.log.warn("Skipping file %s for temp dir for %s, file does not exist (yet)" % (filename, self.key))

            # write the workfile to this directory under its canonical name
            previous = self.previous_artifact_filepath
            workfile = os.path.join(tempdir, self.previous_canonical_filename)
            if not os.path.exists(os.path.dirname(workfile)):
                os.makedirs(os.path.dirname(workfile))
            shutil.copyfile(previous, workfile)

    def canonical_dir(self, ignore_args = False):
        return os.path.dirname(self.name)

    def canonical_basename(self, ignore_args = False):
        return os.path.basename(self.canonical_filename(ignore_args))

    def canonical_filename(self, ignore_args = False):
        fn = os.path.splitext(self.key.split("|")[0])[0]

        if self.args.has_key('canonical-name') and not ignore_args:
            parent_dir = os.path.dirname(fn)
            return os.path.join(parent_dir, self.args['canonical-name'])
        elif self.args.has_key('postfix') and not ignore_args:
            return "%s%s%s" % (fn, self.ext, self.args['postfix'])
        else:
            return "%s%s" % (fn, self.ext)

    def long_canonical_filename(self):
        return "%s%s" % (self.key.replace("|", "-"), self.ext)

    def filename(self):
        """
        The filename where artifact content is stored, based on the hashstring.
        """
        if not hasattr(self, 'ext'):
            raise Exception("artifact %s has no ext" % self.key)
        return "%s%s" % (self.hashstring, self.ext)

    def filepath(self):
        """
        Full path (including artifacts dir location) to location where artifact content is stored.
        """
        return os.path.join(self.artifacts_dir, self.filename())

    def abs_filepath(self):
        return os.path.abspath(self.filepath())

    def breadcrumbs(self):
        """A list of parent dirs, plus the filename if it's not 'index.html'."""
        parent_dirs = os.path.dirname(self.canonical_filename()).split("/")

        if self.canonical_basename() == "index.html":
            result = parent_dirs
        else:
            result = parent_dirs.append(self.canonical_basename())

        if not result:
            result = []

        return result

    def titleized_name(self):
        if self.canonical_basename() == "index.html":
            return self.breadcrumbs()[-1].replace("-"," ").title()
        else:
            return os.path.splitext(self.canonical_basename())[0].replace("-"," ").title()

    def unique_key(self):
        return "%s:%s:%s" % (self.batch_id, self.document_key, self.key)

    def web_safe_document_key(self):
        # TODO this might not be unique
        return self.document_key.replace("/","-").replace("|", "-")

    def url(self):
        # TODO test for final
        return "/%s" % self.canonical_filename()

    def hyperlink(self, link_text = None):
        # TODO test for final
        if not link_text:
            link_text = self.canonical_basename()

        return """<a href="%s">%s</a>""" % (self.url(), link_text)

    def iframe(self, link_text = None, width = "600px", height = "300px"):
        # TODO test for final
        args = {
                'url' : self.url(),
                'hyperlink' : self.hyperlink(link_text),
                'width' : width,
                'height' : height
        }

        return """
<iframe src="%(url)s" width="%(width)s" height="%(height)s" style="border: thin solid gray;">
%(hyperlink)s
</iframe>
        """ % args

    def img(self):
        # TODO test for final
        return """<img src="/%s" alt="Image generated by dexy %s" />""" % (self.canonical_filename(), self.key)

    def has_sections(self):
        return (self.data_dict.keys() != ['1'])

    def relative_path_to_input(self, input_artifact):
        my_dir = os.path.dirname(self.name)
        input_dir = os.path.dirname(input_artifact.name)
        if my_dir == input_dir:
            relpath = ""
        else:
            relpath = os.path.relpath(input_dir, my_dir)
        return relpath

    def relative_key_for_input(self, input_artifact):
        relpath = self.relative_path_to_input(input_artifact)
        return os.path.join(relpath, os.path.basename(input_artifact.key))
Exemple #54
0
 def keys(self):
     return list(OrderedDict.keys(self))
Exemple #55
0
class InMemoryDataStore(Delegate):
    def __init__(self):
        super(InMemoryDataStore,self).__init__()
        self.tables = OrderedDict()
        self.transactions = []
        self.batch_count = 0
        self.in_batch = False

    def create(self):
        # Since data store is in memory, nothing needs to be done
        pass

    def drop(self):
        self.tables = OrderedDict()

    def truncate(self):
        self.drop()

    def get_count(self, type, row, columns=None, column_start=None, super_column=None, column_finish=None):
        return self.get_cf(type).get_count(row, columns=columns, column_start=column_start,
                                           column_finish=column_finish, super_column=super_column)

    def get_cf(self, cf_name):
        if not cf_name in self.tables:
            self.tables[cf_name] = self.create_cf(cf_name)
        return self.tables[cf_name]

    def create_cf(self, type, column_type=ASCII, super=False, index_columns=list()):
        self.tables[type] = ColumnFamily(type, column_type)
        return self.tables[type]
    
    def create_secondary_index(self, type, column, column_type=None):
        # DO NOTHING, for now we just do complete scans since memory is "fast enough"
        pass

    def cf_exists(self, type):
        return type in self.tables.keys()

    def insert(self, cf, row, columns):
        def execute():
            cf.insert(row, columns)

        if self.in_batch:
            self.transactions.append(execute)
        else:
            execute()

    def remove(self, cf, row, columns=None, super_column=None):
        def execute():
            cf.remove(row, columns=columns, super_column=super_column)
            
        if self.in_batch:
            self.transactions.append(execute)
        else:
            execute()

    def start_batch(self, queue_size = 0):
        self.in_batch = True
        self.batch_count += 1

    def commit_batch(self):
        self.batch_count -= 1
        if not self.batch_count:
            for item in self.transactions:
                item()
            self.transactions = []
            self.in_batch = False
Exemple #56
0
class InMemoryDataStore(Delegate):
    def __init__(self):
        super(InMemoryDataStore, self).__init__()
        self.tables = OrderedDict()
        self.transactions = []
        self.batch_count = 0
        self.in_batch = False

    def create(self):
        # Since data store is in memory, nothing needs to be done
        pass

    def drop(self):
        self.tables = OrderedDict()

    def truncate(self):
        self.drop()

    def get_count(self,
                  type,
                  row,
                  columns=None,
                  column_start=None,
                  super_column=None,
                  column_finish=None):
        return self.get_cf(type).get_count(row,
                                           columns=columns,
                                           column_start=column_start,
                                           column_finish=column_finish,
                                           super_column=super_column)

    def get_cf(self, cf_name):
        if not cf_name in self.tables:
            self.tables[cf_name] = self.create_cf(cf_name)
        return self.tables[cf_name]

    def create_cf(self,
                  type,
                  column_type=ASCII,
                  super=False,
                  index_columns=list()):
        self.tables[type] = ColumnFamily(type, column_type)
        return self.tables[type]

    def create_secondary_index(self, type, column, column_type=None):
        # DO NOTHING, for now we just do complete scans since memory is "fast enough"
        pass

    def cf_exists(self, type):
        return type in self.tables.keys()

    def insert(self, cf, row, columns):
        def execute():
            cf.insert(row, columns)

        if self.in_batch:
            self.transactions.append(execute)
        else:
            execute()

    def remove(self, cf, row, columns=None, super_column=None):
        def execute():
            cf.remove(row, columns=columns, super_column=super_column)

        if self.in_batch:
            self.transactions.append(execute)
        else:
            execute()

    def start_batch(self, queue_size=0):
        self.in_batch = True
        self.batch_count += 1

    def commit_batch(self):
        self.batch_count -= 1
        if not self.batch_count:
            for item in self.transactions:
                item()
            self.transactions = []
            self.in_batch = False