/
update-perl.py
67 lines (48 loc) · 1.91 KB
/
update-perl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from debian import deb822
import re
from rdflib import Graph, Literal, Namespace
from semantic_debian.namespaces import DCTERMS, PROJECT
from semantic_debian.udd import udd
CPANURI = Namespace("http://ontologi.es/cpan-data/dist/")
g = Graph()
print "I: Mapping binary packages to source packages"
res = udd.query("SELECT package, source FROM packages")
sources = {}
for r in res:
sources[r[0]] = r[1]
def lookup_source(pkg):
if pkg in sources.keys():
return sources[pkg]
else:
return None
print "I: Mapping Perl modules to CPAN distributions"
with open("/tmp/02packages.details.txt") as f:
pkg_det = f.readlines()
spaces = re.compile(" +")
cpan_modules = {}
for line in pkg_det[9:]:
row = spaces.split(line.strip())
cpan_modules[row[0]] = row[2].split("/")[-1].rsplit("-", 1)[0]
print "I: Mapping source packages to Perl modules"
mapping = {}
def add_mapping(source, module):
if source not in mapping.keys():
mapping[source] = set()
mapping[source].add(module)
with open('/tmp/PerlPackages') as debpkgs:
for pkg in deb822.Packages.iter_paragraphs(debpkgs):
if 'Perl-Modules' in pkg:
source = lookup_source(pkg['Package'])
if source == None:
print "W: No source package was found for %s" % (pkg['Package'],)
continue
modules = [x.strip() for x in pkg['Perl-Modules'].strip().split('\n')]
for module in [x.split(" ")[0].strip() for x in modules]:
if module in cpan_modules.keys():
add_mapping(source, cpan_modules[module])
with open("perl.map", "w") as out:
for source in mapping.keys():
for dist in mapping[source]:
g.add( ( PROJECT[source], DCTERMS.relation, CPANURI[dist + "/project"]))
out.write(source + ": " + str(len(mapping[source])) + " " + str(mapping[source]) + "\n")
g.serialize("perl.ttl", format="turtle")