Example #1
0
def clean(db_loc):
    """
    Some fragments are duplicates, or are not included in the db, but there are still atom and pseudoatom records
    that refer to them. This is not ideal, so this script deletes these orphan records.
    :param db_loc:
    :return:
    """
    # create the database for the output
    db = SqliteExtDatabase(
        db_loc,
        pragmas={
            'cache_size': -1024 * 64,  # 64MB page-cache.
            'journal_mode':
            'wal',  # Use WAL-mode (you should always use this!).
            'foreign_keys': 0,
            'wal_autocheckpoint': 10,
        })
    # get the models
    Fragment, Heritage, PseudoAtoms, Atoms = lib_read(db)

    Fragment.create_table(safe=True)
    Heritage.create_table(safe=True)
    PseudoAtoms.create_table(safe=True)
    Atoms.create_table(safe=True)

    logger.debug("Trying to clean up the database:")
    logger.debug("Deleting missing ATOM records")
    with db.atomic():
        bad_atoms = Atoms.delete().where(
            (Atoms.frag.not_in(Fragment.select())))
        bad_atoms.execute()

    logger.debug("Deleting missing PSEUDO_ATOM records")
    with db.atomic():
        bad_patoms = PseudoAtoms.delete().where(
            (PseudoAtoms.frag.not_in(Fragment.select())))
        bad_patoms.execute()

    logger.debug("Deleting missing HERITAGE records")
    with db.atomic():
        bad_heritage = Heritage.delete().where(
            (Heritage.frag.not_in(Fragment.select())))
        bad_heritage.execute()

    logger.info("Done.")
Example #2
0

db.connect()
db.create_tables([
    Journal, PLOSArticle, ArticleType, CoAuthorPLOSArticle,
    CorrespondingAuthor, JATSType, Affiliations, Country, SubjectsPLOSArticle,
    Subjects
])

corpus_dir = starterdir if args.starter else None
all_files = Corpus(corpus_dir)
num_files = len(all_files) if args.random is None else args.random

for article in tqdm(islice(all_files, args.random), total=num_files):
    journal_name = journal_title_dict[article.journal.upper()]
    with db.atomic() as atomic:
        try:
            journal = Journal.create(journal=journal_name)
        except IntegrityError:
            db.rollback()
            journal = Journal.get(Journal.journal == journal_name)
    with db.atomic() as atomic:
        try:
            article_type = ArticleType.create(article_type=article.plostype)
        except IntegrityError:
            db.rollback()
            article_type = ArticleType.get(
                ArticleType.article_type == article.plostype)
    with db.atomic() as atomic:
        try:
            j_type = JATSType.create(jats_type=article.type_)
Example #3
0
	lang = CharField()
	created_at = DateTimeField()
	location = CharField()
	tweet_sentiment = IntegerField(default = 0)

	class Meta:
		primary_key = False


word_costs = parse_afinn_file("AFINN-111.txt")

db.connect()
db.create_tables([Tweet])

items = parse_tweets_file("three_minutes_tweets.json.txt")
with db.atomic():
	for i in chunk(items, 100):
		sentiments = [t for t in i if t.get("lang") == "en"]
		for s in sentiments:
			s.update(tweet_sentiment = get_tweet_value(s.get("tweet_text"), word_costs))
		Tweet.insert_many(i).execute()

db.close()

"""
db.connect()

all_tweets = Tweet.select()
for tweet in all_tweets:
	tweet_sentiment = get_tweet_value(tweet.tweet_text, word_costs)
	if tweet_sentiment != 0:
Example #4
0
def libgen(mol_list, output_name):
    """
    function to generate a database format library of fragments from a mol, list of mol objects, .smi, or .sdf file
    :param mol_list: list of molecules, a single molecule, or a filename of molecules to read
    :type mol_list: str|Chem.Mol|[Chem.Mol]
    :param output_name: name of the database to use?
    :type output_name: str
    :return:
    """
    # if a file not a list then read into list
    if isinstance(mol_list, str) and mol_list.endswith(".smi"):
        mol_list = Chem.SmilesMolSupplier(mol_list,
                                          delimiter="\t",
                                          titleLine=False)
    elif isinstance(mol_list, str) and mol_list.endswith(".sdf"):
        mol_list = Chem.SDMolSupplier(mol_list)
    elif type(mol_list) == Chem.Mol:
        mol_list = [mol_list]
    elif type(mol_list) == list:
        assert type(mol_list[0]) == Chem.Mol
    else:
        raise Exception(
            "Did you provide a list of mol objects? Input type error.")

    fragment_dict_deque = deque()
    heritage_dict_deque = deque()
    atoms_dict_deque = deque()
    pseudoatoms_dict_deque = deque()
    logger.info("Fragmenting:")
    n = len(mol_list)
    i = 0
    t0 = time.time()
    for mol in mol_list:
        re_mol = RecomposerMol.fromMol(mol=mol)
        frag_list, heritage_list, atoms_list, pseudo_atoms_list = re_mol.get_all_fragments(
            7)
        fragment_dict_deque.extend(frag_list)
        heritage_dict_deque.extend(heritage_list)
        atoms_dict_deque.extend(atoms_list)
        pseudoatoms_dict_deque.extend(pseudo_atoms_list)
        logger.info("DONE: %d/%d %.f" % (i, n, 1000 * (time.time() - t0) /
                                         (i + 1)))
        i += 1

    logger.info("Done")
    logger.info("Saving:")

    # create the database for the output
    db = SqliteExtDatabase(
        output_name,
        pragmas={
            'cache_size': -1024 * 64,  # 64MB page-cache.
            'journal_mode':
            'wal',  # Use WAL-mode (you should always use this!).
            'foreign_keys': 0,
            'wal_autocheckpoint': 10,
        })

    db.connect()
    # get the models
    Fragment, Heritage, PseudoAtoms, Atoms = lib_read(db)

    Fragment.create_table(safe=True)
    Heritage.create_table(safe=True)
    PseudoAtoms.create_table(safe=True)
    Atoms.create_table(safe=True)
    with db.atomic():
        if len(fragment_dict_deque) > 0:
            for ents in chunked(fragment_dict_deque, 200):
                query = Fragment.replace_many(ents)
                query.execute()
            for ents in chunked(heritage_dict_deque, 200):
                query = Heritage.replace_many(ents)
                query.execute()
            for ents in chunked(pseudoatoms_dict_deque, 200):
                query = PseudoAtoms.replace_many(ents)
                query.execute()
            for ents in chunked(atoms_dict_deque, 200):
                query = Atoms.replace_many(ents)
                query.execute()
    db.close()
    clean(output_name)

    return 1
#    uuid = CharField()
#    title = SearchField()
#    content = SearchField()
#
#    class Meta:
#        database = db



db.create_tables([Note, NoteIndex, Tags], safe = True)

def load_json(f):
    return json.loads(open(f, 'r').read())

# Store notes
with db.atomic():
    for notebook in iglob(libpath + "/*.qvnotebook"):
        meta = load_json(list(iglob(notebook + "/meta.json"))[0])
        nb_name = meta["name"]

        for c in iglob(notebook + "/*.qvnote/content.json"):
            meta = load_json(c.replace("content.json", "meta.json"))
            content = load_json(c)
            tagset = meta["tags"]
            full_content = ' '.join([x["data"] for x in content["cells"]])
            snippets = [x for x in content["cells"] if x["type"] == "code"]
            # Store Notes
            n = Note.create(uuid = meta["uuid"],
                 title = meta["title"],
                 notebook = nb_name,
                 last_modified = datetime.datetime.fromtimestamp(meta["updated_at"]))