Example #1
0
    def analyze_citegraph_citation_age_plot(self, cites, degree, distribution, article):
        self.log.debug("    Writing citation age plot")
        this_year = datetime.datetime.today().year
        maxcites = 40
        maxage = this_year - 1954

        cited_by_age = []
        citations = []
        for case in sorted(degree.keys()):
            try:
                year = int(case[27:31])
                caseage = this_year - year
                if year < 1954:
                    continue
            except ValueError:
                # some malformed URIs/Celexnos
                continue
            if degree[case] <= maxcites:
                cited_by_age.append(caseage)
                citations.append(degree[case])

        cases_by_age = [0] * (maxage + 1)
        for citing, cited in cites:
            year = int(citing[27:31])
            caseage = this_year - year
            if year < 1954:
                continue
            if caseage < 0:
                continue
            cases_by_age[caseage] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 5)
        plt.axis([0, maxage, 0, maxcites])
        ax = plt.subplot(211)
        plt.hexbin(cited_by_age, citations, gridsize=maxcites,
                   bins='log', cmap=cm.hot_r)
        # plt.scatter(age,citations)
        ax.set_title("Distribution of citations by age")
        ax.set_ylabel("# of citations")
        #cb = plt.colorbar()
        # cb.set_label('log(# of cases with # of citations)')
        ax = plt.subplot(212)
        ax.set_title("Distribution of cases by age")
        plt.axis([0, maxage, 0, max(cases_by_age)])
        plt.bar(na.array(list(range(len(cases_by_age)))) + 0.5, cases_by_age)

        filetype = self.graph_filetype
        if article:
            filename = "citation_age_plot_%s" % (article.split("/")[-1])
        else:
            filename = "citation_age_plot_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)
Example #2
0
 def set_image_palette(self, r, g, b):
     '''Given a set of RGB colors, create a list of 24bit numbers representing the pallet.
     I.e., RGB of (1,64,127) would be saved as 82047, or the number 00000001 01000000 011111111'''
     self.imagebuffer = array.array('l')
     self.clear_cal_display()
     sz = len(r)
     i = 0
     self.pal = []
     while i < sz:
         rf = int(b[i])
         gf = int(g[i])
         bf = int(r[i])
         self.pal.append((rf << 16) | (gf << 8) | (bf))
         i = i + 1
Example #3
0
 def set_image_palette(self, r,g,b): 
     '''Given a set of RGB colors, create a list of 24bit numbers representing the pallet.
     I.e., RGB of (1,64,127) would be saved as 82047, or the number 00000001 01000000 011111111'''
     self.imagebuffer = array.array('l')
     self.clear_cal_display()
     sz = len(r)
     i =0
     self.pal = []
     while i < sz:
         rf = int(b[i])
         gf = int(g[i])
         bf = int(r[i])
         self.pal.append((rf<<16) |  (gf<<8) | (bf)) 
         i = i+1        
Example #4
0
 def test_load_buffer_array(self):
     """Test loading from various buffer objects."""
     mixer.init()
     try:
         import array
         samples = b'\x00\xff' * 24
         arsample = array.array('b')
         if hasattr(arsample, 'frombytes'):
             # Python 3
             arsample.frombytes(samples)
         else:
             arsample.fromstring(samples)
         snd = mixer.Sound(bytearray(samples))
         raw = snd.get_raw()
         self.assertTrue(isinstance(raw, bytes_))
         self.assertEqual(raw, samples)
     finally:
         mixer.quit()
Example #5
0
 def test_load_buffer_array(self):
     """Test loading from various buffer objects."""
     mixer.init()
     try:
         import array
         samples = b'\x00\xff' * 24
         arsample = array.array('b')
         if hasattr(arsample, 'frombytes'):
             # Python 3
             arsample.frombytes(samples)
         else:
             arsample.fromstring(samples)
         snd = mixer.Sound(bytearray(samples))
         raw = snd.get_raw()
         self.assertTrue(isinstance(raw, bytes_))
         self.assertEqual(raw, samples)
     finally:
         mixer.quit()
Example #6
0
number_of_images_to_read = 3;
ROI_shape = (64,64);
mat_shape = np.append(number_of_images_to_read,ROI_shape)
read_size = np.prod(ROI_shape)*number_of_images_to_read;
fid_read = open(file_name,'rb');
bla = fid_read.read(read_size*bytes_per_element)
unpack_length = int(length(bla)/8);
bla2 = struct.unpack('d'*unpack_length, bla) #returns a tuple...not an array :(
bla2_array = double(bla2);
bla2_array_image = bla2_array.reshape(mat_shape,order='C')
imshow(bla2_array_image[0])
#read using numpy.fromfile:
bla_np = np.fromfile(file_name,'d',count=read_size);
imshow(bla_np.reshape(ROI_shape));
#using array.array
bla_double_array = array.array('d',bla);
######################################################################################################################################################################################################


######################################################################################################################################################################################################
#ASTROPY:
import astropy.table
import astropy.units as u
import numpy as np
 
# Create table from scratch
ra = np.random.random(5)
t = table.Table()
t.add_column(table.Column(name='ra', data=ra, units=u.degree))
# Write out to file
t.write('myfile.fits')  # also support HDF5, ASCII, etc.
Example #7
0
class GraphAnalyze(object):
    def prep_annotation_file(self, basefile):
        goldstandard = self.eval_get_goldstandard(basefile)
        baseline_set = self.eval_get_ranked_set_baseline(basefile)
        baseline_map = self.eval_calc_map(
            self.eval_calc_aps(baseline_set, goldstandard))
        print("Baseline MAP %f" % baseline_map)
        self.log.info("Calculating ranked set (pagerank, unrestricted)")
        pagerank_set = self.eval_get_ranked_set(basefile,
                                                "pagerank",
                                                age_compensation=False,
                                                restrict_cited=False)
        pagerank_map = self.eval_calc_map(
            self.eval_calc_aps(pagerank_set, goldstandard))
        print("Pagerank MAP %f" % pagerank_map)
        sets = [{
            'label': 'Baseline',
            'data': baseline_set
        }, {
            'label': 'Gold standard',
            'data': goldstandard
        }, {
            'label': 'PageRank',
            'data': pagerank_set
        }]

        g = Graph()
        g.bind('dcterms', self.ns['dcterms'])
        g.bind('rinfoex', self.ns['rinfoex'])

        XHT_NS = "{http://www.w3.org/1999/xhtml}"
        tree = ET.parse(self.parsed_path(basefile))
        els = tree.findall("//" + XHT_NS + "div")
        articles = []
        for el in els:
            if 'typeof' in el.attrib and el.attrib[
                    'typeof'] == "eurlex:Article":
                article = str(el.attrib['id'][1:])
                articles.append(article)
        for article in articles:
            self.log.info("Results for article %s" % article)
            articlenode = URIRef("http://lagen.nu/ext/celex/12008E%03d" %
                                 int(article))
            resultsetcollectionnode = BNode()
            g.add((resultsetcollectionnode, RDF.type, RDF.List))
            rc = Collection(g, resultsetcollectionnode)
            g.add((articlenode, DCTERMS["relation"], resultsetcollectionnode))
            for s in sets:
                resultsetnode = BNode()
                listnode = BNode()
                rc.append(resultsetnode)
                g.add((resultsetnode, RDF.type,
                       RINFOEX["RelatedContentCollection"]))
                g.add((resultsetnode, DCTERMS["title"], Literal(s["label"])))
                g.add((resultsetnode, DCTERMS["hasPart"], listnode))
                c = Collection(g, listnode)
                g.add((listnode, RDF.type, RDF.List))
                if article in s['data']:
                    print(("    Set %s" % s['label']))
                    for result in s['data'][article]:
                        resnode = BNode()
                        g.add((resnode, DCTERMS["references"],
                               Literal(result[0])))
                        g.add((resnode, DCTERMS["title"], Literal(result[1])))
                        c.append(resnode)
                        print(("        %s" % result[1]))

        return self.graph_to_annotation_file(g, basefile)

    def graph_to_image(self, graph, imageformat, filename):
        import pydot
        import rdflib
        dot = pydot.Dot()
        # dot.progs = {"dot": "c:/Program Files/Graphviz2.26.3/bin/dot.exe"}

        # code from rdflib.util.graph_to_dot, but adjusted to handle unicode
        nodes = {}
        for s, o in graph.subject_objects():
            for i in s, o:
                if i not in list(nodes.keys()):
                    if isinstance(i, rdflib.BNode):
                        nodes[i] = repr(i)[7:]
                    elif isinstance(i, rdflib.Literal):
                        nodes[i] = repr(i)[16:-1]
                    elif isinstance(i, rdflib.URIRef):
                        nodes[i] = repr(i)[22:-2]

        for s, p, o in graph.triples((None, None, None)):
            dot.add_edge(pydot.Edge(nodes[s], nodes[o], label=repr(p)[22:-2]))

        self.log.debug("Writing %s format to %s" % (imageformat, filename))
        util.ensure_dir(filename)
        dot.write(path=filename, prog="dot", format=imageformat)
        self.log.debug("Wrote %s" % filename)

    top_articles = []
    graph_filetype = "png"

    # yields an iterator of Article URIs

    def _articles(self, basefile):
        # Those articles we have gold standard sets for now
        self.top_articles = [
            'http://lagen.nu/ext/celex/12008E263',
            'http://lagen.nu/ext/celex/12008E101',
            'http://lagen.nu/ext/celex/12008E267',
            'http://lagen.nu/ext/celex/12008E107',
            'http://lagen.nu/ext/celex/12008E108',
            'http://lagen.nu/ext/celex/12008E296',
            'http://lagen.nu/ext/celex/12008E258',
            'http://lagen.nu/ext/celex/12008E045',
            'http://lagen.nu/ext/celex/12008E288',
            'http://lagen.nu/ext/celex/12008E034',
        ]

        # For evaluation, only return the 20 top cited articles (which
        # analyze_article_citations incidentally compute for us). For
        # full-scale generation, use commented-out code below.
        if not self.top_articles:
            self.top_articles = self.analyze_article_citations(quiet=True)
        return self.top_articles

        # For full-scale processing, return all articles present in e.g. TFEU:
        # XHT_NS = "{http://www.w3.org/1999/xhtml}"
        #tree = ET.parse(self.parsed_path(basefile))
        #els = tree.findall("//"+XHT_NS+"div")
        # for el in els:
        #    if 'typeof' in el.attrib and el.attrib['typeof'] == "eurlex:Article":
        #        yield el.attrib['about']

    # returns a RDFLib.Graph
    def _sameas(self):
        sameas = Graph()
        sameas_rdf = util.relpath(
            os.path.dirname(__file__) + "/../res/eut/sameas.n3")
        sameas.load(sameas_rdf, format="n3")
        return sameas

    def _query_cases(self, article, sameas):
        pred = util.ns['owl'] + "sameAs"
        q = ""
        if article:
            q += "{ ?subj eurlex:cites <%s> }\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?subj eurlex:cites <%s> }\n" % equiv

        return """
PREFIX eurlex:<http://lagen.nu/eurlex#>
PREFIX dcterms:<http://purl.org/dc/terms/>
SELECT DISTINCT ?subj WHERE {
    ?subj ?pred ?obj .
    %s
    FILTER (regex(str(?subj), "^http://lagen.nu/ext/celex/6"))
}
""" % (q)

    # Returns a python list of dicts
    def _query_cites(self,
                     article,
                     sameas,
                     restrict_citing,
                     restrict_cited,
                     year=None):
        if not year:
            year = datetime.datetime.today().year
        pred = util.ns['owl'] + "sameAs"
        q = ""
        if restrict_citing:
            q += "{ ?subj eurlex:cites <%s> }\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?subj eurlex:cites <%s> }\n" % equiv

        if restrict_cited:
            if q:
                q += ".\n"
            q = "{?obj eurlex:cites <%s>}\n" % article
            for equiv in sameas.objects(URIRef(article), URIRef(pred)):
                q += "    UNION { ?obj eurlex:cites <%s> }\n" % equiv

        return """
PREFIX eurlex:<http://lagen.nu/eurlex#>
PREFIX dcterms:<http://purl.org/dc/terms/>
SELECT DISTINCT ?subj ?pred ?obj ?celexnum WHERE {
    ?subj ?pred ?obj .
    ?subj eurlex:celexnum ?celexnum.
    %s
    FILTER (regex(str(?obj), "^http://lagen.nu/ext/celex/6") &&
            ?pred = eurlex:cites &&
            str(?celexnum) < str("6%s"@en))
}
""" % (q, year)

    def temp_analyze(self):
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        # sq = self._query_cites('http://lagen.nu/ext/celex/12008E045',self._sameas(),False, True, 2012)
        sq = self._query_cites(None, self._sameas(), False, False, 2012)
        print(sq)
        cites = store.select(sq, format="python")
        self.log.debug("    Citation graph contains %s citations" %
                       (len(cites)))

        # remove duplicate citations, self-citations and pinpoints
        # in citations
        citedict = {}
        for cite in cites:
            # print repr(cite)
            if "-" in cite['obj']:
                cite['obj'] = cite['obj'].split("-")[0]

            if (cite['subj'] != cite['obj']):
                citedict[(cite['subj'], cite['obj'])] = True

        self.log.debug("    Normalized graph contains %s citations" %
                       len(citedict))

        degree = {}
        for citing, cited in list(citedict.keys()):
            if citing not in degree:
                degree[citing] = []
            if cited not in degree:
                degree[cited] = []
            degree[cited].append(citing)

        return

    def analyze(self):
        articles = self.analyze_article_citations(num_of_articles=10)
        # articles = self._articles('tfeu')
        self.analyze_baseline_queries(articles)
        self.analyze_citation_graphs(articles)

    def analyze_article_citations(self, num_of_articles=20, quiet=False):
        """Prints and returns a list of the top 20 most important articles in the
        TFEU treaty, as determined by the number of citing cases."""

        # Create a mapping of article equivalencies, eg Art 28 TEC == Art 34 TFEU
        sameas = self._sameas()
        equivs = {}
        pred = util.ns['owl'] + "sameAs"
        for (s, o) in sameas.subject_objects(URIRef(pred)):
            equivs[str(o)] = str(s)
        self.log.debug("Defined %s equivalent article references" %
                       len(equivs))

        # Select unique articles citings
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        sq = """PREFIX eurlex:<http://lagen.nu/eurlex#>
                SELECT DISTINCT ?case ?article WHERE {
                    ?case eurlex:cites ?article .
                    FILTER (regex(str(?article), "^http://lagen.nu/ext/celex/1"))
             }"""
        cites = store.select(sq, format="python")

        citationcount = {}
        unmapped = {}
        self.log.debug("Going through %s unique citations" % len(cites))
        for cite in cites:
            article = cite['article'].split("-")[0]
            if "12008M" in article:
                pass
            elif article in equivs:
                article = equivs[article]
            else:
                if article in unmapped:
                    unmapped[article] += 1
                else:
                    unmapped[article] = 1
                article = None

            # Keep track of the number of citing cases
            if article:
                if article in citationcount:
                    citationcount[article] += 1
                else:
                    citationcount[article] = 1

        # Report the most common cites to older treaty articles that
        # we have no equivalents for in TFEU
        # sorted_unmapped = sorted(unmapped.iteritems(), key=itemgetter(1))[-num_of_articles:]
        # if not quiet:
        #    print "UNMAPPED:"
        #    pprint(sorted_unmapped)

        # Report and return the most cited articles
        sorted_citationcount = sorted(iter(list(citationcount.items())),
                                      key=itemgetter(1))[-num_of_articles:]
        if not quiet:
            print("CITATION COUNTS:")
            pprint(sorted_citationcount)
        return [x[0] for x in reversed(sorted_citationcount)]

    def analyze_baseline_queries(self, analyzed_articles, num_of_keyterms=5):
        basefile = "tfeu"

        # Helper from http://effbot.org/zone/element-lib.htm

        def flatten(elem, include_tail=0):
            text = elem.text or ""
            for e in elem:
                text += flatten(e, 1)
                if include_tail and elem.tail:
                    text += elem.tail
            return text

        # step 1: Create a temporary whoosh index in order to find out
        # the most significant words for each article

        #ana = analysis.StandardAnalyzer()
        ana = analysis.StemmingAnalyzer()
        # vectorformat = formats.Frequency(ana)
        schema = fields.Schema(article=fields.ID(unique=True),
                               content=fields.TEXT(analyzer=ana, stored=True))

        st = RamStorage()
        tmpidx = st.create_index(schema)
        w = tmpidx.writer()

        XHT_NS = "{http://www.w3.org/1999/xhtml}"
        tree = ET.parse(self.parsed_path(basefile))
        els = tree.findall("//" + XHT_NS + "div")
        articles = []
        for el in els:
            if 'typeof' in el.attrib and el.attrib[
                    'typeof'] == "eurlex:Article":
                text = util.normalize_space(flatten(el))
                article = str(el.attrib['about'])
                articles.append(article)
                w.update_document(article=article, content=text)
        w.commit()
        self.log.info("Indexed %d articles" % len(articles))

        # Step 2: Open the large whoosh index containing the text of
        # all cases. Then, for each article, use the 5 most distinctive terms
        # (filtering away numbers) to create a query against that index
        tempsearch = tmpidx.searcher()
        g = Graph()
        g.bind('celex', 'http://lagen.nu/ext/celex/')
        g.bind('ir', 'http://lagen.nu/informationretrieval#')
        IR = Namespace('http://lagen.nu/informationretrieval#')
        # celex:12008E264 ir:keyterm "blahonga"@en.

        outfile = self.generic_path("keyterms", "analyzed", ".tex")
        util.ensure_dir(outfile)
        fp = open(outfile, "w")
        fp.write("""
\\begin{tabular}{r|%s}
  \\hline
  \\textbf{Art.} & \\multicolumn{%s}{l}{\\textbf{Terms}} \\\\
  \\hline
""" % ("l" * num_of_keyterms, num_of_keyterms))

        for article in analyzed_articles:
            fp.write(str(int(article.split("E")[1])))
            r = tempsearch.search(query.Term("article", article))
            terms = r.key_terms("content", numterms=num_of_keyterms + 1)
            terms = [t[0] for t in terms
                     if not t[0].isdigit()][:num_of_keyterms]
            for term in terms:
                fp.write(" & " + term)
                g.add((URIRef(article), IR["keyterm"], Literal(term,
                                                               lang="en")))
            self.log.debug("Article %s:%r" % (article, terms))
            fp.write("\\\\\n")
        fp.write("""
  \\hline
\\end{tabular}
""")
        fp.close()

        outfile = self.generic_path("keyterms", "analyzed", ".n3")
        util.ensure_dir(outfile)
        fp = open(outfile, "w")
        fp.write(g.serialize(format="n3"))
        fp.close()

    def analyze_citation_graphs(self, articles=None):
        # Basic setup
        # articles = self._articles('tfeu')[-1:]
        if not articles:
            articles = [None]
        if None not in articles:
            articles.append(None)
        this_year = datetime.datetime.today().year
        store = TripleStore(self.config.storetype, self.config.storelocation,
                            self.config.storerepository)
        sameas = self._sameas()
        distributions = []

        # For each article (and also for no article = the entire citation graph)
        for article in articles:
            # Get a list of all eligble cases (needed for proper degree distribution)
            sq = self._query_cases(article, sameas)
            # print sq
            cases = {}
            caserows = store.select(sq, format="python")
            for r in caserows:
                cases[r['subj']] = 0

            self.log.info("Creating graphs for %s (%s cases)" %
                          (article, len(cases)))
            # Step 1. SPARQL the graph on the form ?citing ?cited
            # (optionally restricting on citing a particular article)
            if article:
                sq = self._query_cites(article, sameas, True, False,
                                       this_year + 1)
            else:
                sq = self._query_cites(None, sameas, False, False,
                                       this_year + 1)

            cites = store.select(sq, format="python")
            self.log.debug("    Citation graph contains %s citations" %
                           (len(cites)))

            # remove duplicate citations, self-citations and pinpoints
            # in citations
            citedict = {}
            missingcases = {}
            for cite in cites:
                # print repr(cite)
                if "-" in cite['obj']:
                    cite['obj'] = cite['obj'].split("-")[0]

                if not cite['obj'] in cases:
                    # print "Case %s (cited in %s) does not exist!\n" % (cite['obj'],
                    # cite['subj'])
                    missingcases[cite['obj']] = True
                    continue

                if (cite['subj'] != cite['obj']):
                    citedict[(cite['subj'], cite['obj'])] = True

            self.log.debug(
                "    Normalized graph contains %s citations (%s cited cases not found)"
                % (len(citedict), len(missingcases)))
            # pprint(missingcases.keys()[:10])

            # Step 2. Dotify the list (maybe the direction of arrows from
            # cited to citing can improve results?) to create a citation
            # graph
            self.analyse_citegraph_graphviz(list(citedict.keys()), article)

            # Step 3. Create a degree distribution plot
            degree, distribution = self.analyze_citegraph_degree_distribution(
                cases, list(citedict.keys()), article)
            if article:
                distributions.append([article, distribution])

            # Step 4. Create a citation/age scatterplot (or rather hexbin)
            self.analyze_citegraph_citation_age_plot(list(citedict.keys()),
                                                     degree, distribution,
                                                     article)

        # Step 5. Create a combined degree distribution graph of the
        # distinct citation networks. Also add the degree distribution
        # of gold standard cases

        self.analyze_citegraph_combined_degree_distribution(distributions)

    def analyse_citegraph_graphviz(self, cites, article, generate_graph=False):
        """Create a dot file (that can later be processed with dot or gephi)"""
        from time import time

        filetype = self.graph_filetype
        if article:
            filename = "citegraph_%s" % article.split("/")[-1]
        else:
            filename = "citegraph_all"

        dot_filename = self.generic_path(filename, "analyzed", ".dot")
        self.log.debug("    Writing graphwiz citation graph for %s" % article)
        fp = open(dot_filename, "w")
        fp.write("""digraph G {
                    graph [
                          ];
""")
        cnt = 0
        for citing, cited in cites:
            cnt += 1
            citing = citing.split("/")[-1]
            cited = cited.split("/")[-1]
            try:
                fp.write("  \"%s\" -> \"%s\" ;\n" % (citing, cited))
            except:
                pass
        fp.write("}")
        fp.close()

        if generate_graph:
            graph_filename = self.generic_path(dot_filename, "analyzed",
                                               "." + filetype)
            engine = "dot"
            start = time()
            cmdline = "%s -T%s -o%s tmp.dot" % (engine, filetype,
                                                graph_filename)
            self.log.debug("Running %s" % cmdline)
            p = subprocess.Popen(cmdline, shell=True)
            p.wait()
            self.log.info("Graph %s created in %.3f sec" %
                          (graph_filename, time() - start))

    def analyze_citegraph_degree_distribution(self, cases, cites, article):
        self.log.debug("    Writing degree distribution graph")
        degree = cases
        # self.log.debug("    %s cases, first elements %r" % (len(cases),cases.values()[:5]))
        # this_year = datetime.datetime.today().year
        maxcites = 40
        # maxage = this_year - 1954

        for citing, cited in cites:
            if citing not in degree:
                degree[citing] = 0
            if cited not in degree:
                degree[cited] = 0
            degree[cited] += 1

        distribution = [0] * (max(degree.values()) + 1)

        for value in list(degree.values()):
            distribution[value] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 4)
        ax = plt.subplot(111)
        ax.set_ylabel('Number of cases being cited <x> times')
        ax.set_xlabel('Number of citing cases (max %s)' % maxcites)
        ax.set_title('Degree distribution of case citations')

        filetype = self.graph_filetype
        if article:
            filename = "degree_distribution_%s" % (article.split("/")[-1])
        else:
            filename = "degree_distribution_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.plot(distribution[:maxcites])
        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)
        return (degree, distribution)

    def analyze_citegraph_combined_degree_distribution(self, distributions):
        self.log.debug("    Writing combined degree distribution graph")
        # this_year = datetime.datetime.today().year
        maxcites = 40
        # maxnumber = 1000
        # maxage = this_year - 1954

        fig = plt.figure()
        fig.set_size_inches(8, 4)
        ax = plt.subplot(111)
        ax.set_ylabel('Number of cases being cited <x> times')
        ax.set_xlabel('Number of citing cases (max %s)' % maxcites)
        ax.set_title(
            'Degree distribution of case citations concering specific articles'
        )

        filetype = self.graph_filetype
        filename = "degree_distribution_combined"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        styles = []
        for i in range(1, 5):
            for j in (['-', '--', '-.', ':']):
                # for j in (['-','-','-','-','-']):
                styles.append((i, j))

        cnt = 0
        for (article, distribution) in distributions:
            label = article.split("/")[-1].split("E")[1]
            self.log.debug("        Plotting %s %r" %
                           (label, distribution[:4]))
            if label.isdigit():
                label = "Art. %s" % int(label)
            # label += " (%s uncited)" % distribution[0]
            lw, ls = styles[cnt]
            plt.plot(distribution[:maxcites],
                     label=label,
                     linestyle=ls,
                     linewidth=lw)

        # plt.axis([0,maxcites,0,maxnumber])
        plt.legend(loc='best',
                   markerscale=4,
                   prop={'size': 'x-small'},
                   ncol=int(len(distributions) / 6) + 1)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)

    def analyze_citegraph_citation_age_plot(self, cites, degree, distribution,
                                            article):
        self.log.debug("    Writing citation age plot")
        this_year = datetime.datetime.today().year
        maxcites = 40
        maxage = this_year - 1954

        cited_by_age = []
        citations = []
        for case in sorted(degree.keys()):
            try:
                year = int(case[27:31])
                caseage = this_year - year
                if year < 1954:
                    continue
            except ValueError:
                # some malformed URIs/Celexnos
                continue
            if degree[case] <= maxcites:
                cited_by_age.append(caseage)
                citations.append(degree[case])

        cases_by_age = [0] * (maxage + 1)
        for citing, cited in cites:
            year = int(citing[27:31])
            caseage = this_year - year
            if year < 1954:
                continue
            if caseage < 0:
                continue
            cases_by_age[caseage] += 1

        fig = plt.figure()
        fig.set_size_inches(8, 5)
        plt.axis([0, maxage, 0, maxcites])
        ax = plt.subplot(211)
        plt.hexbin(cited_by_age,
                   citations,
                   gridsize=maxcites,
                   bins='log',
                   cmap=cm.hot_r)
        # plt.scatter(age,citations)
        ax.set_title("Distribution of citations by age")
        ax.set_ylabel("# of citations")
        #cb = plt.colorbar()
        # cb.set_label('log(# of cases with # of citations)')
        ax = plt.subplot(212)
        ax.set_title("Distribution of cases by age")
        plt.axis([0, maxage, 0, max(cases_by_age)])
        plt.bar(na.array(list(range(len(cases_by_age)))) + 0.5, cases_by_age)

        filetype = self.graph_filetype
        if article:
            filename = "citation_age_plot_%s" % (article.split("/")[-1])
        else:
            filename = "citation_age_plot_all"
        filename = self.generic_path(filename, "analyzed", "." + filetype)

        plt.savefig(filename)
        plt.close()
        self.log.debug("    Created %s" % filename)