예제 #1
0
#!/usr/bin/env python2
# We want to know, of the files we have, what is the breakdown?

import os
import pandas
from brainbehavior.pubmed import Pubmed

# First we need to download full article text
# Create a pubmed object
email = "*****@*****.**"
pm = Pubmed(email)

# Get pubmed ids for all articles in database
pc_ids = pm.get_pubmed_central_ids()

# We are going to download them here
download_folder = "/scratch/PI/russpold/data/PUBMED/articles"

# Submit scripts to download in batches of 100
iters = len(pc_ids) / 100

# Subset matrix to files we have downloaded
subset = pandas.DataFrame(columns=pm.ftp.columns)

for i in range(0, iters):
    print "%s of %s" % (i, iters)
    download_subfolder = "%s/%s" % (download_folder, i)
    start = i * 100
    if i != iters:
        end = start + 100
    else:
예제 #2
0
#!/usr/bin/env python2

# This script will download pubmed papers for a given start and end index in the current
# ftp manifest file
# Usage : download_pubmed_muhaha.py start end download_folder

import sys
import pandas
from brainbehavior.pubmed import Pubmed

# Get the start and end index of ids from the command line
start = int(sys.argv[1])
end = int(sys.argv[2])
download_folder = sys.argv[3]
email = sys.argv[4]

# First we need to download full article text
# Create a pubmed object
pm = Pubmed(email)

# Get pubmed ids for articles in database
pc_ids = pm.get_pubmed_central_ids()

# Filter down to indices that we want
pc_ids = pc_ids[start:end]

# Download the articles!
pm.download_pubmed(pc_ids,download_folder)
예제 #3
0
#!/usr/bin/env python2

# This script will download pubmed papers for a given start and end index in the current
# ftp manifest file
# Usage : download_pubmed_muhaha.py start end download_folder

import sys
import pandas
from brainbehavior.pubmed import Pubmed

# Get the start and end index of ids from the command line
pmid = sys.argv[1]
download_folder = sys.argv[2]
email = sys.argv[3]

# First we need to download full article text
# Create a pubmed object
pm = Pubmed(email)

# Download the articles!
pm.download_pubmed([pmid],download_folder)
예제 #4
0
#!/usr/bin/env python2
# We want to know, of the files we have, what is the breakdown?


import os
import pandas
from brainbehavior.pubmed import Pubmed

# First we need to download full article text
# Create a pubmed object
email = "*****@*****.**"
pm = Pubmed(email)

# Get pubmed ids for all articles in database
pc_ids = pm.get_pubmed_central_ids()

# We are going to download them here
download_folder = "/scratch/PI/russpold/data/PUBMED/articles"

# Submit scripts to download in batches of 100
iters = len(pc_ids)/100

# Subset matrix to files we have downloaded
subset = pandas.DataFrame(columns=pm.ftp.columns)

for i in range(0,iters):
    print "%s of %s" %(i,iters)
    download_subfolder = "%s/%s" %(download_folder,i)
    start = i*100
    if i != iters:
        end = start + 100
예제 #5
0
#!/usr/bin/env python2

# This script will launch instances of download_pubmed_muhaha.py

import os
import time
from brainbehavior.pubmed import Pubmed
from glob import glob

# First we need to download full article text
# Create a pubmed object
email = "*****@*****.**"
pm = Pubmed(email)

# Get pubmed ids for all articles in database
pc_ids = pm.get_pubmed_central_ids()

# Download folder
download_folder = "/scratch/PI/russpold/data/PUBMED/articles"

# Submit scripts to download in batches of 100
start = 0
iters = len(pc_ids)/100

# Function to submit a single iteration of a missing job
def submit_single_missing(pmid,download_folder,email):
  jobname = "pm_%s" %(pmid)
  filey = open(".job/%s.job" % (jobname),"w")
  filey.writelines("#!/bin/bash\n")
  filey.writelines("#SBATCH --job-name=%s\n" %(jobname))
  filey.writelines("#SBATCH --output=.out/%s.out\n" %(jobname))