def handle(self, **options):
        since = get_last_change()
        writer = get_writer()
        try:
            while True:
                changes = settings.db.changes(since=since)
                since = changes["last_seq"]
                for changeset in changes["results"]:
                    try:
                        doc = settings.db[changeset["id"]]
                    except couchdb.http.ResourceNotFound:
                        continue
                    if "type" in doc and doc["type"] == "page":
                        print "indexing", doc["url"]
                        soup = BeautifulSoup(doc["content"])
                        if soup.body is None:
                            continue

                        desc = soup.findAll('meta', attrs={ "name": desc_re })

                        writer.update_document(
                                title=unicode(soup.title(text=True)[0]) if soup.title is not None and len(soup.title(text=True)) > 0 else doc["url"],
                                url=unicode(doc["url"]),
                                desc=unicode(desc[0]["content"]) if len(desc) > 0 and desc[0]["content"] is not None else u"",
                                rank=doc["rank"],
                                content=unicode(soup.title(text=True)[0] + "\n" + doc["url"] + "\n" + "".join(soup.body(text=True)))
                            )

                    writer.commit()
                    writer = get_writer()

                set_last_change(since)
        finally:
            set_last_change(since)
Example #2
0
    def handle(self, **options):
        since = get_last_change()
        writer = get_writer()
        try:
            while True:
                changes = settings.db.changes(since=since)
                since = changes["last_seq"]
                for changeset in changes["results"]:
                    try:
                        doc = settings.db[changeset["id"]]
                    except couchdb.http.ResourceNotFound:
                        continue
                    if "type" in doc and doc["type"] == "page":
                        print "indexing", doc["url"]
                        soup = BeautifulSoup(doc["content"])
                        if soup.body is None:
                            continue

                        desc = soup.findAll('meta', attrs={"name": desc_re})

                        writer.update_document(
                            title=unicode(soup.title(
                                text=True)[0]) if soup.title is not None
                            and len(soup.title(text=True)) > 0 else doc["url"],
                            url=unicode(doc["url"]),
                            desc=unicode(desc[0]["content"]) if len(desc) > 0
                            and desc[0]["content"] is not None else u"",
                            rank=doc["rank"],
                            content=unicode(
                                soup.title(text=True)[0] + "\n" + doc["url"] +
                                "\n" + "".join(soup.body(text=True))))

                    writer.commit()
                    writer = get_writer()

                set_last_change(since)
        finally:
            set_last_change(since)
Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:
        Energy.append(col[0].find(text=True))
        Mu.append(col[1].find(text=True))
        Muen.append(col[2].find(text=True))
print col[1]

plt.loglog(Energy, Mu, label='Mu')
plt.loglog(Energy, Muen, label='Muen')
plt.title(soup.title(text=True))
plt.legend()
plt.show()

URL = 'http://physics.nist.gov/PhysRefData/XrayMassCoef/ComTab/bone.html'
response = urllib2.urlopen(URL)
html = response.read()
soup = BeautifulSoup(html)

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:
Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3:
        Energy.append(col[0].find(text=True))
        Mu.append(col[1].find(text=True))
        Muen.append(col[2].find(text=True))
print col[1]

plt.loglog(Energy, Mu, label='Mu')
plt.loglog(Energy, Muen, label='Muen')
plt.title(soup.title(text=True))
plt.legend()
plt.show()

URL = 'http://physics.nist.gov/PhysRefData/XrayMassCoef/ComTab/bone.html'
response = urllib2.urlopen(URL)
html = response.read()
soup = BeautifulSoup(html)

Energy = []
Mu = []
Muen = []
table = soup.find('table')
for row in table.findAll('tr'):
    col = row.findAll('td')
    if len(str(col).split()) == 3: