예제 #1
0
#!./pyenv/bin/python
from pyquery import PyQuery as pq
import recordhelper as helper


def sanitize(cell):
    if cell.text is None:
        return ""
    else:
        return cell.text.strip()


def process_rec(key, rec):
    cells = pq(rec)("table")("td")
    records = [cells[x : x + 13] for x in xrange(0, len(cells), 13)]
    for record in records:
        helper.emit("\t".join([sanitize(cell) for cell in record]))


def parse_key(text):
    return text.split()[0]


helper.process_records(process_rec, parse_key, "__key")
예제 #2
0
#!./pyenv/bin/python

#LAS standard spec:
#https://esd.halliburton.com/support/LSM/GGT/ProMAXSuite/ProMAX/5000/5000_8/Help/promax/las_overview.pdf

import json, las
import recordhelper as helper

def process_record(filename, record):
  if '~' not in record: return 'No proper start of record'
  halves = record[record.index('~'):].strip().split('~A')
  if len(halves) < 2: return 'Improperly separated metadata & data blocks'

  metadata = las.parse_metadata(\
    las.sanitize(line.strip('.').strip()) for line in las.filter_lines(halves[0], ['-'])\
  )
  if len(metadata['curveAliases']) < 1: return 'Improperly formatted metadata block'

  for block in ['V', 'W', 'C']:
    if block in metadata:
      for mnemonic, val in metadata[block].iteritems():
        helper.emit('%s\t%s\t%s\t%s\t%s' % \
          (filename, block, mnemonic, val.get('UOM', ''), val.get('description', '')))

helper.process_records(process_record, las.parse_filename, '__key')
예제 #3
0
        halves[1] = halves[1][halves[1].index('\n'):]
        #filter blank and lines starting with #, split resulting text into tokens
        tokens = '\t'.join(las.filter_lines(halves[1], ['#'])).split()
    except:
        return 'bad separation between metadata and curve data'

    if len(tokens) % len(metadata['curveAliases']) != 0:
        return 'mismatched reading count'

    null_vals = get_nulls(metadata)
    curve_aliases = metadata['curveAliases']
    step_type = curve_aliases[0]
    for idx in xrange(
            0, len(tokens),
            len(curve_aliases)):  # idx is index of first reading on a step
        step_values = tokens[
            idx:idx + len(curve_aliases)]  # get all readings for the next step
        try:
            for idy, reading in enumerate(
                    filter(lambda x: float(x) not in null_vals,
                           step_values)[1:]):
                helper.emit('\t'.join([
                    filename, step_type, step_values[0], curve_aliases[idy],
                    metadata['C'][curve_aliases[idy]].get('UOM', ''), reading
                ]))
        except ValueError:
            pass


helper.process_records(process_record, las.parse_filename, '__key')
예제 #4
0
    #Append extracted field value for each field in list of fields
    for field in fields:
        if field in text:
            vals.append(text.split(field + ': ')[1].split(' ')[0])
        else:
            vals.append('null')
    return vals


def emit(fields, cells):
    #Print tab separated list of fields concatted with array of extracted cell values
    helper.emit('\t'.join(fields + [cell.text_content() for cell in cells]))


def process_rec(key, rec):
    if 'Average PSI' in rec:
        fields = extract_fields(['File No'], pq(rec).text())
        cells = pq(rec)('table').eq(2)('td')
        #Split each 7 cell values into one row and emit record
        [
            emit(fields, record)
            for record in [cells[x:x + 7] for x in xrange(0, len(cells), 7)]
        ]


def parse_key(text):
    return text.split()[0]


helper.process_records(process_rec, parse_key, '__key')
예제 #5
0
#!./pyenv/bin/python
from pyquery import PyQuery as pq
import recordhelper as helper

def extract_fields(fields, text):
  vals = []
  #Append extracted field value for each field in list of fields
  for field in fields:
    if field in text: vals.append(text.split(field + ': ')[1].split(' ')[0].replace(u'\xa0', 'null'))
    else: vals.append('null')
  return vals

#Print tab separated list of fields concatted with array of extracted cell values
def emit(fields, cells): helper.emit('\t'.join(fields + [cell.text_content() for cell in cells]))

def process_rec(key, rec):
  if 'Vent/Flare' in rec:
    fields = extract_fields(['File No', 'Perfs', 'Spacing', 'Total Depth'], pq(rec).text())
    cells = pq(rec)('table').eq(2)('td')
    #Split each 9 cell values into one row and emit record
    [emit(fields, record) for record in [cells[x:x+9] for x in xrange(0, len(cells), 9)]]

def parse_key(text): return text.split()[0]

#read all of input and run it through process_rec
helper.process_records(process_rec, parse_key, '__key')