Python array Examples

Programming Language: Python

Namespace/Package Name: numpy.array

Method/Function: array

Examples at hotexamples.com: 7

Python array - 7 examples found. These are the top rated real world Python examples of numpy.array.array extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: graphanalyze.py Project: staffanm/ferenda

    def analyze_citegraph_citation_age_plot(self, cites, degree, distribution, article):
        self.log.debug("    Writing citation age plot")
        this_year = datetime.datetime.today().year
        maxcites = 40
        maxage = this_year - 1954

        cited_by_age = []
        citations = []
        for case in sorted(degree.keys()):
            try:
                year = int(case[27:31])
                caseage = this_year - year
                if year < 1954:
                    continue
            except ValueError:
                # some malformed URIs/Celexnos
                continue
            if degree[case] <= maxcites:
                cited_by_age.append(caseage)
                citations.append(degree[case])

        cases_by_age = [0] * (maxage + 1)
        for citing, cited in cites:
            year = int(citing[27:31])
            caseage = this_year - year
            if year < 1954:
                continue
            if caseage < 0:
                continue
            cases_by_age[caseage] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 5)
        plt.axis([0, maxage, 0, maxcites])
        ax = plt.subplot(211)
        plt.hexbin(cited_by_age, citations, gridsize=maxcites,
                   bins='log', cmap=cm.hot_r)
        # plt.scatter(age,citations)
        ax.set_title("Distribution of citations by age")
        ax.set_ylabel("# of citations")
        #cb = plt.colorbar()
        # cb.set_label('log(# of cases with # of citations)')
        ax = plt.subplot(212)
        ax.set_title("Distribution of cases by age")
        plt.axis([0, maxage, 0, max(cases_by_age)])
        plt.bar(na.array(list(range(len(cases_by_age)))) + 0.5, cases_by_age)

        filetype = self.graph_filetype
        if article:
            filename = "citation_age_plot_%s" % (article.split("/")[-1])
        else:
            filename = "citation_age_plot_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)

Example #2

Show file

 def set_image_palette(self, r, g, b):
     '''Given a set of RGB colors, create a list of 24bit numbers representing the pallet.
     I.e., RGB of (1,64,127) would be saved as 82047, or the number 00000001 01000000 011111111'''
     self.imagebuffer = array.array('l')
     self.clear_cal_display()
     sz = len(r)
     i = 0
     self.pal = []
     while i < sz:
         rf = int(b[i])
         gf = int(g[i])
         bf = int(r[i])
         self.pal.append((rf << 16) | (gf << 8) | (bf))
         i = i + 1

Example #3

Show file

File: eyeTracker.py Project: hblumsack/WilsonLab

 def set_image_palette(self, r,g,b): 
     '''Given a set of RGB colors, create a list of 24bit numbers representing the pallet.
     I.e., RGB of (1,64,127) would be saved as 82047, or the number 00000001 01000000 011111111'''
     self.imagebuffer = array.array('l')
     self.clear_cal_display()
     sz = len(r)
     i =0
     self.pal = []
     while i < sz:
         rf = int(b[i])
         gf = int(g[i])
         bf = int(r[i])
         self.pal.append((rf<<16) |  (gf<<8) | (bf)) 
         i = i+1

Example #4

Show file

File: mixer_test.py Project: vavilon/pygame_cffi

 def test_load_buffer_array(self):
     """Test loading from various buffer objects."""
     mixer.init()
     try:
         import array
         samples = b'\x00\xff' * 24
         arsample = array.array('b')
         if hasattr(arsample, 'frombytes'):
             # Python 3
             arsample.frombytes(samples)
         else:
             arsample.fromstring(samples)
         snd = mixer.Sound(bytearray(samples))
         raw = snd.get_raw()
         self.assertTrue(isinstance(raw, bytes_))
         self.assertEqual(raw, samples)
     finally:
         mixer.quit()

Example #5

Show file

File: mixer_test.py Project: CTPUG/pygame_cffi

 def test_load_buffer_array(self):
     """Test loading from various buffer objects."""
     mixer.init()
     try:
         import array
         samples = b'\x00\xff' * 24
         arsample = array.array('b')
         if hasattr(arsample, 'frombytes'):
             # Python 3
             arsample.frombytes(samples)
         else:
             arsample.fromstring(samples)
         snd = mixer.Sound(bytearray(samples))
         raw = snd.get_raw()
         self.assertTrue(isinstance(raw, bytes_))
         self.assertEqual(raw, samples)
     finally:
         mixer.quit()

Example #6

Show file

number_of_images_to_read = 3;
ROI_shape = (64,64);
mat_shape = np.append(number_of_images_to_read,ROI_shape)
read_size = np.prod(ROI_shape)*number_of_images_to_read;
fid_read = open(file_name,'rb');
bla = fid_read.read(read_size*bytes_per_element)
unpack_length = int(length(bla)/8);
bla2 = struct.unpack('d'*unpack_length, bla) #returns a tuple...not an array :(
bla2_array = double(bla2);
bla2_array_image = bla2_array.reshape(mat_shape,order='C')
imshow(bla2_array_image[0])
#read using numpy.fromfile:
bla_np = np.fromfile(file_name,'d',count=read_size);
imshow(bla_np.reshape(ROI_shape));
#using array.array
bla_double_array = array.array('d',bla);
######################################################################################################################################################################################################


######################################################################################################################################################################################################
#ASTROPY:
import astropy.table
import astropy.units as u
import numpy as np
 
# Create table from scratch
ra = np.random.random(5)
t = table.Table()
t.add_column(table.Column(name='ra', data=ra, units=u.degree))
# Write out to file
t.write('myfile.fits')  # also support HDF5, ASCII, etc.

Example #7

Show file

File: graphanalyze.py Project: zigit/ferenda

class GraphAnalyze(object):
    def prep_annotation_file(self, basefile):
        goldstandard = self.eval_get_goldstandard(basefile)
        baseline_set = self.eval_get_ranked_set_baseline(basefile)
        baseline_map = self.eval_calc_map(
            self.eval_calc_aps(baseline_set, goldstandard))
        print("Baseline MAP %f" % baseline_map)
        self.log.info("Calculating ranked set (pagerank, unrestricted)")
        pagerank_set = self.eval_get_ranked_set(basefile,
                                                "pagerank",
                                                age_compensation=False,
                                                restrict_cited=False)
        pagerank_map = self.eval_calc_map(
            self.eval_calc_aps(pagerank_set, goldstandard))
        print("Pagerank MAP %f" % pagerank_map)
        sets = [{
            'label': 'Baseline',
            'data': baseline_set
        }, {
            'label': 'Gold standard',
            'data': goldstandard
        }, {
            'label': 'PageRank',
            'data': pagerank_set
        }]

        g = Graph()
        g.bind('dcterms', self.ns['dcterms'])
        g.bind('rinfoex', self.ns['rinfoex'])

        XHT_NS = "{http://www.w3.org/1999/xhtml}"
        tree = ET.parse(self.parsed_path(basefile))
        els = tree.findall("//" + XHT_NS + "div")
        articles = []
        for el in els:
            if 'typeof' in el.attrib and el.attrib[
                    'typeof'] == "eurlex:Article":
                article = str(el.attrib['id'][1:])
                articles.append(article)
        for article in articles:
            self.log.info("Results for article %s" % article)
            articlenode = URIRef("http://lagen.nu/ext/celex/12008E%03d" %
                                 int(article))
            resultsetcollectionnode = BNode()
            g.add((resultsetcollectionnode, RDF.type, RDF.List))
            rc = Collection(g, resultsetcollectionnode)
            g.add((articlenode, DCTERMS["relation"], resultsetcollectionnode))
            for s in sets:
                resultsetnode = BNode()
                listnode = BNode()
                rc.append(resultsetnode)
                g.add((resultsetnode, RDF.type,
                       RINFOEX["RelatedContentCollection"]))
                g.add((resultsetnode, DCTERMS["title"], Literal(s["label"])))
                g.add((resultsetnode, DCTERMS["hasPart"], listnode))
                c = Collection(g, listnode)
                g.add((listnode, RDF.type, RDF.List))
                if article in s['data']:
                    print(("    Set %s" % s['label']))
                    for result in s['data'][article]:
                        resnode = BNode()
                        g.add((resnode, DCTERMS["references"],
                               Literal(result[0])))
                        g.add((resnode, DCTERMS["title"], Literal(result[1])))
                        c.append(resnode)
                        print(("        %s" % result[1]))

        return self.graph_to_annotation_file(g, basefile)

    def graph_to_image(self, graph, imageformat, filename):
        import pydot
        import rdflib
        dot = pydot.Dot()
        # dot.progs = {"dot": "c:/Program Files/Graphviz2.26.3/bin/dot.exe"}

        # code from rdflib.util.graph_to_dot, but adjusted to handle unicode
        nodes = {}
        for s, o in graph.subject_objects():
            for i in s, o:
                if i not in list(nodes.keys()):
                    if isinstance(i, rdflib.BNode):
                        nodes[i] = repr(i)[7:]
                    elif isinstance(i, rdflib.Literal):
                        nodes[i] = repr(i)[16:-1]
                    elif isinstance(i, rdflib.URIRef):
                        nodes[i] = repr(i)[22:-2]

        for s, p, o in graph.triples((None, None, None)):
            dot.add_edge(pydot.Edge(nodes[s], nodes[o], label=repr(p)[22:-2]))

        self.log.debug("Writing %s format to %s" % (imageformat, filename))
        util.ensure_dir(filename)
        dot.write(path=filename, prog="dot", format=imageformat)
        self.log.debug("Wrote %s" % filename)

    top_articles = []
    graph_filetype = "png"

    # yields an iterator of Article URIs

    def _articles(self, basefile):
        # Those articles we have gold standard sets for now
        self.top_articles = [
            'http://lagen.nu/ext/celex/12008E263',
            'http://lagen.nu/ext/celex/12008E101',
            'http://lagen.nu/ext/celex/12008E267',
            'http://lagen.nu/ext/celex/12008E107',
            'http://lagen.nu/ext/celex/12008E108',
            'http://lagen.nu/ext/celex/12008E296',
            'http://lagen.nu/ext/celex/12008E258',
            'http://lagen.nu/ext/celex/12008E045',
            'http://lagen.nu/ext/celex/12008E288',
            'http://lagen.nu/ext/celex/12008E034',
        ]

        # For evaluation, only return the 20 top cited articles (which
        # analyze_article_citations incidentally compute for us). For
        # full-scale generation, use commented-out code below.
        if not self.top_articles:
            self.top_articles = self.analyze_article_citations(quiet=True)
        return self.top_articles

        # For full-scale processing, return all articles present in e.g. TFEU:
        # XHT_NS = "{http://www.w3.org/1999/xhtml}"
        #tree = ET.parse(self.parsed_path(basefile))
        #els = tree.findall("//"+XHT_NS+"div")
        # for el in els:
        #    if 'typeof' in el.attrib and el.attrib['typeof'] == "eurlex:Article":
        #        yield el.attrib['about']

    # returns a RDFLib.Graph
    def _sameas(self):
        sameas = Graph()
        sameas_rdf = util.relpath(
            os.path.dirname(__file__) + "/../res/eut/sameas.n3")
        sameas.load(sameas_rdf, format="n3")
        return sameas

    def _query_cases(self, article, sameas):
        pred = util.ns['owl'] + "sameAs"
        q = ""
        if article:
            q += "{ ?subj eurlex:cites <%s> }\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?subj eurlex:cites <%s> }\n" % equiv

        return """
PREFIX eurlex:<http://lagen.nu/eurlex#>
PREFIX dcterms:<http://purl.org/dc/terms/>
SELECT DISTINCT ?subj WHERE {
    ?subj ?pred ?obj .
    %s
    FILTER (regex(str(?subj), "^http://lagen.nu/ext/celex/6"))
}
""" % (q)

    # Returns a python list of dicts
    def _query_cites(self,
                     article,
                     sameas,
                     restrict_citing,
                     restrict_cited,
                     year=None):
        if not year:
            year = datetime.datetime.today().year
        pred = util.ns['owl'] + "sameAs"
        q = ""
        if restrict_citing:
            q += "{ ?subj eurlex:cites <%s> }\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?subj eurlex:cites <%s> }\n" % equiv

        if restrict_cited:
            if q:
                q += ".\n"
            q = "{?obj eurlex:cites <%s>}\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?obj eurlex:cites <%s> }\n" % equiv

        return """
PREFIX eurlex:<http://lagen.nu/eurlex#>
PREFIX dcterms:<http://purl.org/dc/terms/>
SELECT DISTINCT ?subj ?pred ?obj ?celexnum WHERE {
    ?subj ?pred ?obj .
    ?subj eurlex:celexnum ?celexnum.
    %s
    FILTER (regex(str(?obj), "^http://lagen.nu/ext/celex/6") &&
            ?pred = eurlex:cites &&
            str(?celexnum) < str("6%s"@en))
}
""" % (q, year)

    def temp_analyze(self):
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        # sq = self._query_cites('http://lagen.nu/ext/celex/12008E045',self._sameas(),False, True, 2012)
        sq = self._query_cites(None, self._sameas(), False, False, 2012)
        print(sq)
        cites = store.select(sq, format="python")
        self.log.debug("    Citation graph contains %s citations" %
                       (len(cites)))

        # remove duplicate citations, self-citations and pinpoints
        # in citations
        citedict = {}
        for cite in cites:
            # print repr(cite)
            if "-" in cite['obj']:
                cite['obj'] = cite['obj'].split("-")[0]

            if (cite['subj'] != cite['obj']):
                citedict[(cite['subj'], cite['obj'])] = True

        self.log.debug("    Normalized graph contains %s citations" %
                       len(citedict))

        degree = {}
        for citing, cited in list(citedict.keys()):
            if citing not in degree:
                degree[citing] = []
            if cited not in degree:
                degree[cited] = []
            degree[cited].append(citing)

        return

    def analyze(self):
        articles = self.analyze_article_citations(num_of_articles=10)
        # articles = self._articles('tfeu')
        self.analyze_baseline_queries(articles)
        self.analyze_citation_graphs(articles)

    def analyze_article_citations(self, num_of_articles=20, quiet=False):
        """Prints and returns a list of the top 20 most important articles in the
        TFEU treaty, as determined by the number of citing cases."""

        # Create a mapping of article equivalencies, eg Art 28 TEC == Art 34 TFEU
        sameas = self._sameas()
        equivs = {}
        pred = util.ns['owl'] + "sameAs"
        for (s, o) in sameas.subject_objects(URIRef(pred)):
            equivs[str(o)] = str(s)
        self.log.debug("Defined %s equivalent article references" %
                       len(equivs))

        # Select unique articles citings
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        sq = """PREFIX eurlex:<http://lagen.nu/eurlex#>
                SELECT DISTINCT ?case ?article WHERE {
                    ?case eurlex:cites ?article .
                    FILTER (regex(str(?article), "^http://lagen.nu/ext/celex/1"))
             }"""
        cites = store.select(sq, format="python")

        citationcount = {}
        unmapped = {}
        self.log.debug("Going through %s unique citations" % len(cites))
        for cite in cites:
            article = cite['article'].split("-")[0]
            if "12008M" in article:
                pass
            elif article in equivs:
                article = equivs[article]
            else:
                if article in unmapped:
                    unmapped[article] += 1
                else:
                    unmapped[article] = 1
                article = None

            # Keep track of the number of citing cases
            if article:
                if article in citationcount:
                    citationcount[article] += 1
                else:
                    citationcount[article] = 1

        # Report the most common cites to older treaty articles that
        # we have no equivalents for in TFEU
        # sorted_unmapped = sorted(unmapped.iteritems(), key=itemgetter(1))[-num_of_articles:]
        # if not quiet:
        #    print "UNMAPPED:"
        #    pprint(sorted_unmapped)

        # Report and return the most cited articles
        sorted_citationcount = sorted(iter(list(citationcount.items())),
                                      key=itemgetter(1))[-num_of_articles:]
        if not quiet:
            print("CITATION COUNTS:")
            pprint(sorted_citationcount)
        return [x[0] for x in reversed(sorted_citationcount)]

    def analyze_baseline_queries(self, analyzed_articles, num_of_keyterms=5):
        basefile = "tfeu"

        # Helper from http://effbot.org/zone/element-lib.htm

        def flatten(elem, include_tail=0):
            text = elem.text or ""
            for e in elem:
                text += flatten(e, 1)
                if include_tail and elem.tail:
                    text += elem.tail
            return text

        # step 1: Create a temporary whoosh index in order to find out
        # the most significant words for each article

        #ana = analysis.StandardAnalyzer()
        ana = analysis.StemmingAnalyzer()
        # vectorformat = formats.Frequency(ana)
        schema = fields.Schema(article=fields.ID(unique=True),
                               content=fields.TEXT(analyzer=ana, stored=True))

        st = RamStorage()
        tmpidx = st.create_index(schema)
        w = tmpidx.writer()

        XHT_NS = "{http://www.w3.org/1999/xhtml}"
        tree = ET.parse(self.parsed_path(basefile))
        els = tree.findall("//" + XHT_NS + "div")
        articles = []
        for el in els:
            if 'typeof' in el.attrib and el.attrib[
                    'typeof'] == "eurlex:Article":
                text = util.normalize_space(flatten(el))
                article = str(el.attrib['about'])
                articles.append(article)
                w.update_document(article=article, content=text)
        w.commit()
        self.log.info("Indexed %d articles" % len(articles))

        # Step 2: Open the large whoosh index containing the text of
        # all cases. Then, for each article, use the 5 most distinctive terms
        # (filtering away numbers) to create a query against that index
        tempsearch = tmpidx.searcher()
        g = Graph()
        g.bind('celex', 'http://lagen.nu/ext/celex/')
        g.bind('ir', 'http://lagen.nu/informationretrieval#')
        IR = Namespace('http://lagen.nu/informationretrieval#')
        # celex:12008E264 ir:keyterm "blahonga"@en.

        outfile = self.generic_path("keyterms", "analyzed", ".tex")
        util.ensure_dir(outfile)
        fp = open(outfile, "w")
        fp.write("""
\\begin{tabular}{r|%s}
  \\hline
  \\textbf{Art.} & \\multicolumn{%s}{l}{\\textbf{Terms}} \\\\
  \\hline
""" % ("l" * num_of_keyterms, num_of_keyterms))

        for article in analyzed_articles:
            fp.write(str(int(article.split("E")[1])))
            r = tempsearch.search(query.Term("article", article))
            terms = r.key_terms("content", numterms=num_of_keyterms + 1)
            terms = [t[0] for t in terms
                     if not t[0].isdigit()][:num_of_keyterms]
            for term in terms:
                fp.write(" & " + term)
                g.add((URIRef(article), IR["keyterm"], Literal(term,
                                                               lang="en")))
            self.log.debug("Article %s:%r" % (article, terms))
            fp.write("\\\\\n")
        fp.write("""
  \\hline
\\end{tabular}
""")
        fp.close()

        outfile = self.generic_path("keyterms", "analyzed", ".n3")
        util.ensure_dir(outfile)
        fp = open(outfile, "w")
        fp.write(g.serialize(format="n3"))
        fp.close()

    def analyze_citation_graphs(self, articles=None):
        # Basic setup
        # articles = self._articles('tfeu')[-1:]
        if not articles:
            articles = [None]
        if None not in articles:
            articles.append(None)
        this_year = datetime.datetime.today().year
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        sameas = self._sameas()
        distributions = []

        # For each article (and also for no article = the entire citation graph)
        for article in articles:
            # Get a list of all eligble cases (needed for proper degree distribution)
            sq = self._query_cases(article, sameas)
            # print sq
            cases = {}
            caserows = store.select(sq, format="python")
            for r in caserows:
                cases[r['subj']] = 0

            self.log.info("Creating graphs for %s (%s cases)" %
                          (article, len(cases)))
            # Step 1. SPARQL the graph on the form ?citing ?cited
            # (optionally restricting on citing a particular article)
            if article:
                sq = self._query_cites(article, sameas, True, False,
                                       this_year + 1)
            else:
                sq = self._query_cites(None, sameas, False, False,
                                       this_year + 1)

            cites = store.select(sq, format="python")
            self.log.debug("    Citation graph contains %s citations" %
                           (len(cites)))

            # remove duplicate citations, self-citations and pinpoints
            # in citations
            citedict = {}
            missingcases = {}
            for cite in cites:
                # print repr(cite)
                if "-" in cite['obj']:
                    cite['obj'] = cite['obj'].split("-")[0]

                if not cite['obj'] in cases:
                    # print "Case %s (cited in %s) does not exist!\n" % (cite['obj'],
                    # cite['subj'])
                    missingcases[cite['obj']] = True
                    continue

                if (cite['subj'] != cite['obj']):
                    citedict[(cite['subj'], cite['obj'])] = True

            self.log.debug(
                "    Normalized graph contains %s citations (%s cited cases not found)"
                % (len(citedict), len(missingcases)))
            # pprint(missingcases.keys()[:10])

            # Step 2. Dotify the list (maybe the direction of arrows from
            # cited to citing can improve results?) to create a citation
            # graph
            self.analyse_citegraph_graphviz(list(citedict.keys()), article)

            # Step 3. Create a degree distribution plot
            degree, distribution = self.analyze_citegraph_degree_distribution(
                cases, list(citedict.keys()), article)
            if article:
                distributions.append([article, distribution])

            # Step 4. Create a citation/age scatterplot (or rather hexbin)
            self.analyze_citegraph_citation_age_plot(list(citedict.keys()),
                                                     degree, distribution,
                                                     article)

        # Step 5. Create a combined degree distribution graph of the
        # distinct citation networks. Also add the degree distribution
        # of gold standard cases

        self.analyze_citegraph_combined_degree_distribution(distributions)

    def analyse_citegraph_graphviz(self, cites, article, generate_graph=False):
        """Create a dot file (that can later be processed with dot or gephi)"""
        from time import time

        filetype = self.graph_filetype
        if article:
            filename = "citegraph_%s" % article.split("/")[-1]
        else:
            filename = "citegraph_all"

        dot_filename = self.generic_path(filename, "analyzed", ".dot")
        self.log.debug("    Writing graphwiz citation graph for %s" % article)
        fp = open(dot_filename, "w")
        fp.write("""digraph G {
                    graph [
                          ];
""")
        cnt = 0
        for citing, cited in cites:
            cnt += 1
            citing = citing.split("/")[-1]
            cited = cited.split("/")[-1]
            try:
                fp.write("  \"%s\" -> \"%s\" ;\n" % (citing, cited))
            except:
                pass
        fp.write("}")
        fp.close()

        if generate_graph:
            graph_filename = self.generic_path(dot_filename, "analyzed",
                                               "." + filetype)
            engine = "dot"
            start = time()
            cmdline = "%s -T%s -o%s tmp.dot" % (engine, filetype,
                                                graph_filename)
            self.log.debug("Running %s" % cmdline)
            p = subprocess.Popen(cmdline, shell=True)
            p.wait()
            self.log.info("Graph %s created in %.3f sec" %
                          (graph_filename, time() - start))

    def analyze_citegraph_degree_distribution(self, cases, cites, article):
        self.log.debug("    Writing degree distribution graph")
        degree = cases
        # self.log.debug("    %s cases, first elements %r" % (len(cases),cases.values()[:5]))
        # this_year = datetime.datetime.today().year
        maxcites = 40
        # maxage = this_year - 1954

        for citing, cited in cites:
            if citing not in degree:
                degree[citing] = 0
            if cited not in degree:
                degree[cited] = 0
            degree[cited] += 1

        distribution = [0] * (max(degree.values()) + 1)

        for value in list(degree.values()):
            distribution[value] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 4)
        ax = plt.subplot(111)
        ax.set_ylabel('Number of cases being cited <x> times')
        ax.set_xlabel('Number of citing cases (max %s)' % maxcites)
        ax.set_title('Degree distribution of case citations')

        filetype = self.graph_filetype
        if article:
            filename = "degree_distribution_%s" % (article.split("/")[-1])
        else:
            filename = "degree_distribution_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.plot(distribution[:maxcites])
        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)
        return (degree, distribution)

    def analyze_citegraph_combined_degree_distribution(self, distributions):
        self.log.debug("    Writing combined degree distribution graph")
        # this_year = datetime.datetime.today().year
        maxcites = 40
        # maxnumber = 1000
        # maxage = this_year - 1954

        fig = plt.figure()
        fig.set_size_inches(8, 4)
        ax = plt.subplot(111)
        ax.set_ylabel('Number of cases being cited <x> times')
        ax.set_xlabel('Number of citing cases (max %s)' % maxcites)
        ax.set_title(
            'Degree distribution of case citations concering specific articles'
        )

        filetype = self.graph_filetype
        filename = "degree_distribution_combined"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        styles = []
        for i in range(1, 5):
            for j in (['-', '--', '-.', ':']):
                # for j in (['-','-','-','-','-']):
                styles.append((i, j))

        cnt = 0
        for (article, distribution) in distributions:
            label = article.split("/")[-1].split("E")[1]
            self.log.debug("        Plotting %s %r" %
                           (label, distribution[:4]))
            if label.isdigit():
                label = "Art. %s" % int(label)
            # label += " (%s uncited)" % distribution[0]
            lw, ls = styles[cnt]
            plt.plot(distribution[:maxcites],
                     label=label,
                     linestyle=ls,
                     linewidth=lw)

        # plt.axis([0,maxcites,0,maxnumber])
        plt.legend(loc='best',
                   markerscale=4,
                   prop={'size': 'x-small'},
                   ncol=int(len(distributions) / 6) + 1)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)

    def analyze_citegraph_citation_age_plot(self, cites, degree, distribution,
                                            article):
        self.log.debug("    Writing citation age plot")
        this_year = datetime.datetime.today().year
        maxcites = 40
        maxage = this_year - 1954

        cited_by_age = []
        citations = []
        for case in sorted(degree.keys()):
            try:
                year = int(case[27:31])
                caseage = this_year - year
                if year < 1954:
                    continue
            except ValueError:
                # some malformed URIs/Celexnos
                continue
            if degree[case] <= maxcites:
                cited_by_age.append(caseage)
                citations.append(degree[case])

        cases_by_age = [0] * (maxage + 1)
        for citing, cited in cites:
            year = int(citing[27:31])
            caseage = this_year - year
            if year < 1954:
                continue
            if caseage < 0:
                continue
            cases_by_age[caseage] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 5)
        plt.axis([0, maxage, 0, maxcites])
        ax = plt.subplot(211)
        plt.hexbin(cited_by_age,
                   citations,
                   gridsize=maxcites,
                   bins='log',
                   cmap=cm.hot_r)
        # plt.scatter(age,citations)
        ax.set_title("Distribution of citations by age")
        ax.set_ylabel("# of citations")
        #cb = plt.colorbar()
        # cb.set_label('log(# of cases with # of citations)')
        ax = plt.subplot(212)
        ax.set_title("Distribution of cases by age")
        plt.axis([0, maxage, 0, max(cases_by_age)])
        plt.bar(na.array(list(range(len(cases_by_age)))) + 0.5, cases_by_age)

        filetype = self.graph_filetype
        if article:
            filename = "citation_age_plot_%s" % (article.split("/")[-1])
        else:
            filename = "citation_age_plot_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)